1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic-core.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
48 #include "insn-attr.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
62 #include "target-globals.h"
64 #include "tm-constrs.h"
66 #include "optabs-libfuncs.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode
;
76 typedef struct minipool_fixup Mfix
;
78 void (*arm_lang_output_object_attributes_hook
)(void);
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx
);
87 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets
*arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
93 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap
);
96 static int arm_address_register_rtx_p (rtx
, int);
97 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
98 static bool is_called_in_ARM_mode (tree
);
99 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
100 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
101 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
102 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
103 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
104 inline static int thumb1_index_register_rtx_p (rtx
, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx
, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx
, int);
110 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
111 static bool arm_print_operand_punct_valid_p (unsigned char code
);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
113 static arm_cc
get_arm_condition_code (rtx
);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx
*, const char *, const char *,
117 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
118 static struct machine_function
*arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
121 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
122 static Mnode
*add_minipool_forward_ref (Mfix
*);
123 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
124 static Mnode
*add_minipool_backward_ref (Mfix
*);
125 static void assign_minipool_offsets (Mfix
*);
126 static void arm_print_value (FILE *, rtx
);
127 static void dump_minipool (rtx_insn
*);
128 static int arm_barrier_cost (rtx_insn
*);
129 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
130 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
131 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree
);
138 static unsigned long arm_compute_func_type (void);
139 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
140 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
141 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
145 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
146 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree
, const_tree
);
150 static void arm_set_default_type_attributes (tree
);
151 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code
,
154 unsigned HOST_WIDE_INT val
,
155 struct four_ints
*return_sequence
);
156 static int optimal_immediate_sequence_1 (enum rtx_code code
,
157 unsigned HOST_WIDE_INT val
,
158 struct four_ints
*return_sequence
,
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree
, tree
);
162 static machine_mode
arm_promote_function_mode (const_tree
,
165 static bool arm_return_in_memory (const_tree
, const_tree
);
166 static rtx
arm_function_value (const_tree
, const_tree
, bool);
167 static rtx
arm_libcall_value_1 (machine_mode
);
168 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
175 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
176 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
177 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
178 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
179 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
180 static void emit_constant_insn (rtx cond
, rtx pattern
);
181 static rtx_insn
*emit_set_insn (rtx
, rtx
);
182 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
185 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
187 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
189 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
190 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
192 static rtx
aapcs_libcall_value (machine_mode
);
193 static int aapcs_select_return_coproc (const_tree
, const_tree
);
195 #ifdef OBJECT_FORMAT_ELF
196 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
197 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
200 static void arm_encode_section_info (tree
, rtx
, int);
203 static void arm_file_end (void);
204 static void arm_file_start (void);
205 static void arm_insert_attributes (tree
, tree
*);
207 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
209 static bool arm_pass_by_reference (cumulative_args_t
,
210 machine_mode
, const_tree
, bool);
211 static bool arm_promote_prototypes (const_tree
);
212 static bool arm_default_short_enums (void);
213 static bool arm_align_anon_bitfield (void);
214 static bool arm_return_in_msb (const_tree
);
215 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
216 static bool arm_return_in_memory (const_tree
, const_tree
);
218 static void arm_unwind_emit (FILE *, rtx_insn
*);
219 static bool arm_output_ttype (rtx
);
220 static void arm_asm_emit_except_personality (rtx
);
222 static void arm_asm_init_sections (void);
223 static rtx
arm_dwarf_register_span (rtx
);
225 static tree
arm_cxx_guard_type (void);
226 static bool arm_cxx_guard_mask_bit (void);
227 static tree
arm_get_cookie_size (tree
);
228 static bool arm_cookie_has_size (void);
229 static bool arm_cxx_cdtor_returns_this (void);
230 static bool arm_cxx_key_method_may_be_inline (void);
231 static void arm_cxx_determine_class_data_visibility (tree
);
232 static bool arm_cxx_class_data_always_comdat (void);
233 static bool arm_cxx_use_aeabi_atexit (void);
234 static void arm_init_libfuncs (void);
235 static tree
arm_build_builtin_va_list (void);
236 static void arm_expand_builtin_va_start (tree
, rtx
);
237 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
238 static void arm_option_override (void);
239 static void arm_option_save (struct cl_target_option
*, struct gcc_options
*);
240 static void arm_option_restore (struct gcc_options
*,
241 struct cl_target_option
*);
242 static void arm_override_options_after_change (void);
243 static void arm_option_print (FILE *, int, struct cl_target_option
*);
244 static void arm_set_current_function (tree
);
245 static bool arm_can_inline_p (tree
, tree
);
246 static void arm_relayout_function (tree
);
247 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
248 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
249 static bool arm_sched_can_speculate_insn (rtx_insn
*);
250 static bool arm_macro_fusion_p (void);
251 static bool arm_cannot_copy_insn_p (rtx_insn
*);
252 static int arm_issue_rate (void);
253 static int arm_first_cycle_multipass_dfa_lookahead (void);
254 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
255 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
256 static bool arm_output_addr_const_extra (FILE *, rtx
);
257 static bool arm_allocate_stack_slots_for_args (void);
258 static bool arm_warn_func_return (tree
);
259 static tree
arm_promoted_type (const_tree t
);
260 static bool arm_scalar_mode_supported_p (scalar_mode
);
261 static bool arm_frame_pointer_required (void);
262 static bool arm_can_eliminate (const int, const int);
263 static void arm_asm_trampoline_template (FILE *);
264 static void arm_trampoline_init (rtx
, tree
, rtx
);
265 static rtx
arm_trampoline_adjust_address (rtx
);
266 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
267 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
268 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
269 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
270 static bool arm_array_mode_supported_p (machine_mode
,
271 unsigned HOST_WIDE_INT
);
272 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
273 static bool arm_class_likely_spilled_p (reg_class_t
);
274 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
275 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
276 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
280 static void arm_conditional_register_usage (void);
281 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
282 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
283 static unsigned int arm_autovectorize_vector_sizes (void);
284 static int arm_default_branch_cost (bool, bool);
285 static int arm_cortex_a5_branch_cost (bool, bool);
286 static int arm_cortex_m_branch_cost (bool, bool);
287 static int arm_cortex_m7_branch_cost (bool, bool);
289 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
290 const unsigned char *sel
);
292 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
296 int misalign ATTRIBUTE_UNUSED
);
297 static unsigned arm_add_stmt_cost (void *data
, int count
,
298 enum vect_cost_for_stmt kind
,
299 struct _stmt_vec_info
*stmt_info
,
301 enum vect_cost_model_location where
);
303 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
304 bool op0_preserve_value
);
305 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
307 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
310 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
312 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
314 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
315 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
316 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
318 /* Table of machine attributes. */
319 static const struct attribute_spec arm_attribute_table
[] =
321 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
322 affects_type_identity } */
323 /* Function calls made to this symbol must be done indirectly, because
324 it may lie outside of the 26 bit addressing range of a normal function
326 { "long_call", 0, 0, false, true, true, NULL
, false },
327 /* Whereas these functions are always known to reside within the 26 bit
329 { "short_call", 0, 0, false, true, true, NULL
, false },
330 /* Specify the procedure call conventions for a function. */
331 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
333 /* Interrupt Service Routines have special prologue and epilogue requirements. */
334 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
336 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
338 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
341 /* ARM/PE has three new attributes:
343 dllexport - for exporting a function/variable that will live in a dll
344 dllimport - for importing a function/variable from a dll
346 Microsoft allows multiple declspecs in one __declspec, separating
347 them with spaces. We do NOT support this. Instead, use __declspec
350 { "dllimport", 0, 0, true, false, false, NULL
, false },
351 { "dllexport", 0, 0, true, false, false, NULL
, false },
352 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
354 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
355 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
356 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
357 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
360 /* ARMv8-M Security Extensions support. */
361 { "cmse_nonsecure_entry", 0, 0, true, false, false,
362 arm_handle_cmse_nonsecure_entry
, false },
363 { "cmse_nonsecure_call", 0, 0, true, false, false,
364 arm_handle_cmse_nonsecure_call
, true },
365 { NULL
, 0, 0, false, false, false, NULL
, false }
368 /* Initialize the GCC target structure. */
369 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
370 #undef TARGET_MERGE_DECL_ATTRIBUTES
371 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
374 #undef TARGET_LEGITIMIZE_ADDRESS
375 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
377 #undef TARGET_ATTRIBUTE_TABLE
378 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
380 #undef TARGET_INSERT_ATTRIBUTES
381 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
383 #undef TARGET_ASM_FILE_START
384 #define TARGET_ASM_FILE_START arm_file_start
385 #undef TARGET_ASM_FILE_END
386 #define TARGET_ASM_FILE_END arm_file_end
388 #undef TARGET_ASM_ALIGNED_SI_OP
389 #define TARGET_ASM_ALIGNED_SI_OP NULL
390 #undef TARGET_ASM_INTEGER
391 #define TARGET_ASM_INTEGER arm_assemble_integer
393 #undef TARGET_PRINT_OPERAND
394 #define TARGET_PRINT_OPERAND arm_print_operand
395 #undef TARGET_PRINT_OPERAND_ADDRESS
396 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
397 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
398 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
400 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
401 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
403 #undef TARGET_ASM_FUNCTION_PROLOGUE
404 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
406 #undef TARGET_ASM_FUNCTION_EPILOGUE
407 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
409 #undef TARGET_CAN_INLINE_P
410 #define TARGET_CAN_INLINE_P arm_can_inline_p
412 #undef TARGET_RELAYOUT_FUNCTION
413 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
415 #undef TARGET_OPTION_OVERRIDE
416 #define TARGET_OPTION_OVERRIDE arm_option_override
418 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
419 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
421 #undef TARGET_OPTION_SAVE
422 #define TARGET_OPTION_SAVE arm_option_save
424 #undef TARGET_OPTION_RESTORE
425 #define TARGET_OPTION_RESTORE arm_option_restore
427 #undef TARGET_OPTION_PRINT
428 #define TARGET_OPTION_PRINT arm_option_print
430 #undef TARGET_COMP_TYPE_ATTRIBUTES
431 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
433 #undef TARGET_SCHED_CAN_SPECULATE_INSN
434 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
436 #undef TARGET_SCHED_MACRO_FUSION_P
437 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
439 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
440 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
442 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
443 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
445 #undef TARGET_SCHED_ADJUST_COST
446 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
448 #undef TARGET_SET_CURRENT_FUNCTION
449 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
451 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
452 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
454 #undef TARGET_SCHED_REORDER
455 #define TARGET_SCHED_REORDER arm_sched_reorder
457 #undef TARGET_REGISTER_MOVE_COST
458 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
460 #undef TARGET_MEMORY_MOVE_COST
461 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
463 #undef TARGET_ENCODE_SECTION_INFO
465 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
467 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
470 #undef TARGET_STRIP_NAME_ENCODING
471 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
473 #undef TARGET_ASM_INTERNAL_LABEL
474 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
476 #undef TARGET_FLOATN_MODE
477 #define TARGET_FLOATN_MODE arm_floatn_mode
479 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
480 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
482 #undef TARGET_FUNCTION_VALUE
483 #define TARGET_FUNCTION_VALUE arm_function_value
485 #undef TARGET_LIBCALL_VALUE
486 #define TARGET_LIBCALL_VALUE arm_libcall_value
488 #undef TARGET_FUNCTION_VALUE_REGNO_P
489 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
491 #undef TARGET_ASM_OUTPUT_MI_THUNK
492 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
493 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
494 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
496 #undef TARGET_RTX_COSTS
497 #define TARGET_RTX_COSTS arm_rtx_costs
498 #undef TARGET_ADDRESS_COST
499 #define TARGET_ADDRESS_COST arm_address_cost
501 #undef TARGET_SHIFT_TRUNCATION_MASK
502 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
503 #undef TARGET_VECTOR_MODE_SUPPORTED_P
504 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
505 #undef TARGET_ARRAY_MODE_SUPPORTED_P
506 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
507 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
508 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
509 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
510 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
511 arm_autovectorize_vector_sizes
513 #undef TARGET_MACHINE_DEPENDENT_REORG
514 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
516 #undef TARGET_INIT_BUILTINS
517 #define TARGET_INIT_BUILTINS arm_init_builtins
518 #undef TARGET_EXPAND_BUILTIN
519 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
520 #undef TARGET_BUILTIN_DECL
521 #define TARGET_BUILTIN_DECL arm_builtin_decl
523 #undef TARGET_INIT_LIBFUNCS
524 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
526 #undef TARGET_PROMOTE_FUNCTION_MODE
527 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
528 #undef TARGET_PROMOTE_PROTOTYPES
529 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
530 #undef TARGET_PASS_BY_REFERENCE
531 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
532 #undef TARGET_ARG_PARTIAL_BYTES
533 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
534 #undef TARGET_FUNCTION_ARG
535 #define TARGET_FUNCTION_ARG arm_function_arg
536 #undef TARGET_FUNCTION_ARG_ADVANCE
537 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
538 #undef TARGET_FUNCTION_ARG_BOUNDARY
539 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
541 #undef TARGET_SETUP_INCOMING_VARARGS
542 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
544 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
545 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
547 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
548 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
549 #undef TARGET_TRAMPOLINE_INIT
550 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
551 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
552 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
554 #undef TARGET_WARN_FUNC_RETURN
555 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
557 #undef TARGET_DEFAULT_SHORT_ENUMS
558 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
560 #undef TARGET_ALIGN_ANON_BITFIELD
561 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
563 #undef TARGET_NARROW_VOLATILE_BITFIELD
564 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
566 #undef TARGET_CXX_GUARD_TYPE
567 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
569 #undef TARGET_CXX_GUARD_MASK_BIT
570 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
572 #undef TARGET_CXX_GET_COOKIE_SIZE
573 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
575 #undef TARGET_CXX_COOKIE_HAS_SIZE
576 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
578 #undef TARGET_CXX_CDTOR_RETURNS_THIS
579 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
581 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
582 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
584 #undef TARGET_CXX_USE_AEABI_ATEXIT
585 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
587 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
588 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
589 arm_cxx_determine_class_data_visibility
591 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
592 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
594 #undef TARGET_RETURN_IN_MSB
595 #define TARGET_RETURN_IN_MSB arm_return_in_msb
597 #undef TARGET_RETURN_IN_MEMORY
598 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
600 #undef TARGET_MUST_PASS_IN_STACK
601 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
604 #undef TARGET_ASM_UNWIND_EMIT
605 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
607 /* EABI unwinding tables use a different format for the typeinfo tables. */
608 #undef TARGET_ASM_TTYPE
609 #define TARGET_ASM_TTYPE arm_output_ttype
611 #undef TARGET_ARM_EABI_UNWINDER
612 #define TARGET_ARM_EABI_UNWINDER true
614 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
615 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
617 #endif /* ARM_UNWIND_INFO */
619 #undef TARGET_ASM_INIT_SECTIONS
620 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
622 #undef TARGET_DWARF_REGISTER_SPAN
623 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
625 #undef TARGET_CANNOT_COPY_INSN_P
626 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
629 #undef TARGET_HAVE_TLS
630 #define TARGET_HAVE_TLS true
633 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
634 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
636 #undef TARGET_LEGITIMATE_CONSTANT_P
637 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
639 #undef TARGET_CANNOT_FORCE_CONST_MEM
640 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
642 #undef TARGET_MAX_ANCHOR_OFFSET
643 #define TARGET_MAX_ANCHOR_OFFSET 4095
645 /* The minimum is set such that the total size of the block
646 for a particular anchor is -4088 + 1 + 4095 bytes, which is
647 divisible by eight, ensuring natural spacing of anchors. */
648 #undef TARGET_MIN_ANCHOR_OFFSET
649 #define TARGET_MIN_ANCHOR_OFFSET -4088
651 #undef TARGET_SCHED_ISSUE_RATE
652 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
654 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
655 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
656 arm_first_cycle_multipass_dfa_lookahead
658 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
659 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
660 arm_first_cycle_multipass_dfa_lookahead_guard
662 #undef TARGET_MANGLE_TYPE
663 #define TARGET_MANGLE_TYPE arm_mangle_type
665 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
666 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
668 #undef TARGET_BUILD_BUILTIN_VA_LIST
669 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
670 #undef TARGET_EXPAND_BUILTIN_VA_START
671 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
672 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
673 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
676 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
677 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
680 #undef TARGET_LEGITIMATE_ADDRESS_P
681 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
683 #undef TARGET_PREFERRED_RELOAD_CLASS
684 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
686 #undef TARGET_PROMOTED_TYPE
687 #define TARGET_PROMOTED_TYPE arm_promoted_type
689 #undef TARGET_SCALAR_MODE_SUPPORTED_P
690 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
692 #undef TARGET_COMPUTE_FRAME_LAYOUT
693 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
695 #undef TARGET_FRAME_POINTER_REQUIRED
696 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
698 #undef TARGET_CAN_ELIMINATE
699 #define TARGET_CAN_ELIMINATE arm_can_eliminate
701 #undef TARGET_CONDITIONAL_REGISTER_USAGE
702 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
704 #undef TARGET_CLASS_LIKELY_SPILLED_P
705 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
707 #undef TARGET_VECTORIZE_BUILTINS
708 #define TARGET_VECTORIZE_BUILTINS
710 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
711 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
712 arm_builtin_vectorized_function
714 #undef TARGET_VECTOR_ALIGNMENT
715 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
717 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
718 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
719 arm_vector_alignment_reachable
721 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
722 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
723 arm_builtin_support_vector_misalignment
725 #undef TARGET_PREFERRED_RENAME_CLASS
726 #define TARGET_PREFERRED_RENAME_CLASS \
727 arm_preferred_rename_class
729 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
730 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
731 arm_vectorize_vec_perm_const_ok
733 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
734 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
735 arm_builtin_vectorization_cost
736 #undef TARGET_VECTORIZE_ADD_STMT_COST
737 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
739 #undef TARGET_CANONICALIZE_COMPARISON
740 #define TARGET_CANONICALIZE_COMPARISON \
741 arm_canonicalize_comparison
743 #undef TARGET_ASAN_SHADOW_OFFSET
744 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
746 #undef MAX_INSN_PER_IT_BLOCK
747 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
749 #undef TARGET_CAN_USE_DOLOOP_P
750 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
752 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
753 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
755 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
756 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
758 #undef TARGET_SCHED_FUSION_PRIORITY
759 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
761 #undef TARGET_ASM_FUNCTION_SECTION
762 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
764 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
765 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
767 #undef TARGET_SECTION_TYPE_FLAGS
768 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
770 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
771 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
773 #undef TARGET_C_EXCESS_PRECISION
774 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
776 /* Although the architecture reserves bits 0 and 1, only the former is
777 used for ARM/Thumb ISA selection in v7 and earlier versions. */
778 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
779 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
781 #undef TARGET_FIXED_CONDITION_CODE_REGS
782 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
784 #undef TARGET_HARD_REGNO_MODE_OK
785 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
787 /* Obstack for minipool constant handling. */
788 static struct obstack minipool_obstack
;
789 static char * minipool_startobj
;
791 /* The maximum number of insns skipped which
792 will be conditionalised if possible. */
793 static int max_insns_skipped
= 5;
795 extern FILE * asm_out_file
;
797 /* True if we are currently building a constant table. */
798 int making_const_table
;
800 /* The processor for which instructions should be scheduled. */
801 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
803 /* The current tuning set. */
804 const struct tune_params
*current_tune
;
806 /* Which floating point hardware to schedule for. */
809 /* Used for Thumb call_via trampolines. */
810 rtx thumb_call_via_label
[14];
811 static int thumb_call_reg_needed
;
813 /* The bits in this mask specify which instruction scheduling options should
815 unsigned int tune_flags
= 0;
817 /* The highest ARM architecture version supported by the
819 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
821 /* Active target architecture and tuning. */
823 struct arm_build_target arm_active_target
;
825 /* The following are used in the arm.md file as equivalents to bits
826 in the above two flag variables. */
828 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
831 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
834 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
837 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
840 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
843 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
846 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
849 /* Nonzero if this chip supports the ARM 6K extensions. */
852 /* Nonzero if this chip supports the ARM 6KZ extensions. */
855 /* Nonzero if instructions present in ARMv6-M can be used. */
858 /* Nonzero if this chip supports the ARM 7 extensions. */
861 /* Nonzero if this chip supports the Large Physical Address Extension. */
862 int arm_arch_lpae
= 0;
864 /* Nonzero if instructions not present in the 'M' profile can be used. */
865 int arm_arch_notm
= 0;
867 /* Nonzero if instructions present in ARMv7E-M can be used. */
870 /* Nonzero if instructions present in ARMv8 can be used. */
873 /* Nonzero if this chip supports the ARMv8.1 extensions. */
876 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
879 /* Nonzero if this chip supports the FP16 instructions extension of ARM
881 int arm_fp16_inst
= 0;
883 /* Nonzero if this chip can benefit from load scheduling. */
884 int arm_ld_sched
= 0;
886 /* Nonzero if this chip is a StrongARM. */
887 int arm_tune_strongarm
= 0;
889 /* Nonzero if this chip supports Intel Wireless MMX technology. */
890 int arm_arch_iwmmxt
= 0;
892 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
893 int arm_arch_iwmmxt2
= 0;
895 /* Nonzero if this chip is an XScale. */
896 int arm_arch_xscale
= 0;
898 /* Nonzero if tuning for XScale */
899 int arm_tune_xscale
= 0;
901 /* Nonzero if we want to tune for stores that access the write-buffer.
902 This typically means an ARM6 or ARM7 with MMU or MPU. */
903 int arm_tune_wbuf
= 0;
905 /* Nonzero if tuning for Cortex-A9. */
906 int arm_tune_cortex_a9
= 0;
908 /* Nonzero if we should define __THUMB_INTERWORK__ in the
910 XXX This is a bit of a hack, it's intended to help work around
911 problems in GLD which doesn't understand that armv5t code is
912 interworking clean. */
913 int arm_cpp_interwork
= 0;
915 /* Nonzero if chip supports Thumb 1. */
918 /* Nonzero if chip supports Thumb 2. */
921 /* Nonzero if chip supports integer division instruction. */
922 int arm_arch_arm_hwdiv
;
923 int arm_arch_thumb_hwdiv
;
925 /* Nonzero if chip disallows volatile memory access in IT block. */
926 int arm_arch_no_volatile_ce
;
928 /* Nonzero if we should use Neon to handle 64-bits operations rather
929 than core registers. */
930 int prefer_neon_for_64bits
= 0;
932 /* Nonzero if we shouldn't use literal pools. */
933 bool arm_disable_literal_pool
= false;
935 /* The register number to be used for the PIC offset register. */
936 unsigned arm_pic_register
= INVALID_REGNUM
;
938 enum arm_pcs arm_pcs_default
;
940 /* For an explanation of these variables, see final_prescan_insn below. */
942 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
943 enum arm_cond_code arm_current_cc
;
946 int arm_target_label
;
947 /* The number of conditionally executed insns, including the current insn. */
948 int arm_condexec_count
= 0;
949 /* A bitmask specifying the patterns for the IT block.
950 Zero means do not output an IT block before this insn. */
951 int arm_condexec_mask
= 0;
952 /* The number of bits used in arm_condexec_mask. */
953 int arm_condexec_masklen
= 0;
955 /* Nonzero if chip supports the ARMv8 CRC instructions. */
956 int arm_arch_crc
= 0;
958 /* Nonzero if chip supports the ARMv8-M security extensions. */
959 int arm_arch_cmse
= 0;
961 /* Nonzero if the core has a very small, high-latency, multiply unit. */
962 int arm_m_profile_small_mul
= 0;
964 /* The condition codes of the ARM, and the inverse function. */
965 static const char * const arm_condition_codes
[] =
967 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
968 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
971 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
972 int arm_regs_in_sequence
[] =
974 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
977 #define ARM_LSL_NAME "lsl"
978 #define streq(string1, string2) (strcmp (string1, string2) == 0)
980 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
981 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
982 | (1 << PIC_OFFSET_TABLE_REGNUM)))
984 /* Initialization code. */
988 enum processor_type scheduler
;
989 unsigned int tune_flags
;
990 const struct tune_params
*tune
;
993 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
994 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1001 /* arm generic vectorizer costs. */
1003 struct cpu_vec_costs arm_default_vec_cost
= {
1004 1, /* scalar_stmt_cost. */
1005 1, /* scalar load_cost. */
1006 1, /* scalar_store_cost. */
1007 1, /* vec_stmt_cost. */
1008 1, /* vec_to_scalar_cost. */
1009 1, /* scalar_to_vec_cost. */
1010 1, /* vec_align_load_cost. */
1011 1, /* vec_unalign_load_cost. */
1012 1, /* vec_unalign_store_cost. */
1013 1, /* vec_store_cost. */
1014 3, /* cond_taken_branch_cost. */
1015 1, /* cond_not_taken_branch_cost. */
1018 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1019 #include "aarch-cost-tables.h"
1023 const struct cpu_cost_table cortexa9_extra_costs
=
1030 COSTS_N_INSNS (1), /* shift_reg. */
1031 COSTS_N_INSNS (1), /* arith_shift. */
1032 COSTS_N_INSNS (2), /* arith_shift_reg. */
1034 COSTS_N_INSNS (1), /* log_shift_reg. */
1035 COSTS_N_INSNS (1), /* extend. */
1036 COSTS_N_INSNS (2), /* extend_arith. */
1037 COSTS_N_INSNS (1), /* bfi. */
1038 COSTS_N_INSNS (1), /* bfx. */
1042 true /* non_exec_costs_exec. */
1047 COSTS_N_INSNS (3), /* simple. */
1048 COSTS_N_INSNS (3), /* flag_setting. */
1049 COSTS_N_INSNS (2), /* extend. */
1050 COSTS_N_INSNS (3), /* add. */
1051 COSTS_N_INSNS (2), /* extend_add. */
1052 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1056 0, /* simple (N/A). */
1057 0, /* flag_setting (N/A). */
1058 COSTS_N_INSNS (4), /* extend. */
1060 COSTS_N_INSNS (4), /* extend_add. */
1066 COSTS_N_INSNS (2), /* load. */
1067 COSTS_N_INSNS (2), /* load_sign_extend. */
1068 COSTS_N_INSNS (2), /* ldrd. */
1069 COSTS_N_INSNS (2), /* ldm_1st. */
1070 1, /* ldm_regs_per_insn_1st. */
1071 2, /* ldm_regs_per_insn_subsequent. */
1072 COSTS_N_INSNS (5), /* loadf. */
1073 COSTS_N_INSNS (5), /* loadd. */
1074 COSTS_N_INSNS (1), /* load_unaligned. */
1075 COSTS_N_INSNS (2), /* store. */
1076 COSTS_N_INSNS (2), /* strd. */
1077 COSTS_N_INSNS (2), /* stm_1st. */
1078 1, /* stm_regs_per_insn_1st. */
1079 2, /* stm_regs_per_insn_subsequent. */
1080 COSTS_N_INSNS (1), /* storef. */
1081 COSTS_N_INSNS (1), /* stored. */
1082 COSTS_N_INSNS (1), /* store_unaligned. */
1083 COSTS_N_INSNS (1), /* loadv. */
1084 COSTS_N_INSNS (1) /* storev. */
1089 COSTS_N_INSNS (14), /* div. */
1090 COSTS_N_INSNS (4), /* mult. */
1091 COSTS_N_INSNS (7), /* mult_addsub. */
1092 COSTS_N_INSNS (30), /* fma. */
1093 COSTS_N_INSNS (3), /* addsub. */
1094 COSTS_N_INSNS (1), /* fpconst. */
1095 COSTS_N_INSNS (1), /* neg. */
1096 COSTS_N_INSNS (3), /* compare. */
1097 COSTS_N_INSNS (3), /* widen. */
1098 COSTS_N_INSNS (3), /* narrow. */
1099 COSTS_N_INSNS (3), /* toint. */
1100 COSTS_N_INSNS (3), /* fromint. */
1101 COSTS_N_INSNS (3) /* roundint. */
1105 COSTS_N_INSNS (24), /* div. */
1106 COSTS_N_INSNS (5), /* mult. */
1107 COSTS_N_INSNS (8), /* mult_addsub. */
1108 COSTS_N_INSNS (30), /* fma. */
1109 COSTS_N_INSNS (3), /* addsub. */
1110 COSTS_N_INSNS (1), /* fpconst. */
1111 COSTS_N_INSNS (1), /* neg. */
1112 COSTS_N_INSNS (3), /* compare. */
1113 COSTS_N_INSNS (3), /* widen. */
1114 COSTS_N_INSNS (3), /* narrow. */
1115 COSTS_N_INSNS (3), /* toint. */
1116 COSTS_N_INSNS (3), /* fromint. */
1117 COSTS_N_INSNS (3) /* roundint. */
1122 COSTS_N_INSNS (1) /* alu. */
1126 const struct cpu_cost_table cortexa8_extra_costs
=
1132 COSTS_N_INSNS (1), /* shift. */
1134 COSTS_N_INSNS (1), /* arith_shift. */
1135 0, /* arith_shift_reg. */
1136 COSTS_N_INSNS (1), /* log_shift. */
1137 0, /* log_shift_reg. */
1139 0, /* extend_arith. */
1145 true /* non_exec_costs_exec. */
1150 COSTS_N_INSNS (1), /* simple. */
1151 COSTS_N_INSNS (1), /* flag_setting. */
1152 COSTS_N_INSNS (1), /* extend. */
1153 COSTS_N_INSNS (1), /* add. */
1154 COSTS_N_INSNS (1), /* extend_add. */
1155 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1159 0, /* simple (N/A). */
1160 0, /* flag_setting (N/A). */
1161 COSTS_N_INSNS (2), /* extend. */
1163 COSTS_N_INSNS (2), /* extend_add. */
1169 COSTS_N_INSNS (1), /* load. */
1170 COSTS_N_INSNS (1), /* load_sign_extend. */
1171 COSTS_N_INSNS (1), /* ldrd. */
1172 COSTS_N_INSNS (1), /* ldm_1st. */
1173 1, /* ldm_regs_per_insn_1st. */
1174 2, /* ldm_regs_per_insn_subsequent. */
1175 COSTS_N_INSNS (1), /* loadf. */
1176 COSTS_N_INSNS (1), /* loadd. */
1177 COSTS_N_INSNS (1), /* load_unaligned. */
1178 COSTS_N_INSNS (1), /* store. */
1179 COSTS_N_INSNS (1), /* strd. */
1180 COSTS_N_INSNS (1), /* stm_1st. */
1181 1, /* stm_regs_per_insn_1st. */
1182 2, /* stm_regs_per_insn_subsequent. */
1183 COSTS_N_INSNS (1), /* storef. */
1184 COSTS_N_INSNS (1), /* stored. */
1185 COSTS_N_INSNS (1), /* store_unaligned. */
1186 COSTS_N_INSNS (1), /* loadv. */
1187 COSTS_N_INSNS (1) /* storev. */
1192 COSTS_N_INSNS (36), /* div. */
1193 COSTS_N_INSNS (11), /* mult. */
1194 COSTS_N_INSNS (20), /* mult_addsub. */
1195 COSTS_N_INSNS (30), /* fma. */
1196 COSTS_N_INSNS (9), /* addsub. */
1197 COSTS_N_INSNS (3), /* fpconst. */
1198 COSTS_N_INSNS (3), /* neg. */
1199 COSTS_N_INSNS (6), /* compare. */
1200 COSTS_N_INSNS (4), /* widen. */
1201 COSTS_N_INSNS (4), /* narrow. */
1202 COSTS_N_INSNS (8), /* toint. */
1203 COSTS_N_INSNS (8), /* fromint. */
1204 COSTS_N_INSNS (8) /* roundint. */
1208 COSTS_N_INSNS (64), /* div. */
1209 COSTS_N_INSNS (16), /* mult. */
1210 COSTS_N_INSNS (25), /* mult_addsub. */
1211 COSTS_N_INSNS (30), /* fma. */
1212 COSTS_N_INSNS (9), /* addsub. */
1213 COSTS_N_INSNS (3), /* fpconst. */
1214 COSTS_N_INSNS (3), /* neg. */
1215 COSTS_N_INSNS (6), /* compare. */
1216 COSTS_N_INSNS (6), /* widen. */
1217 COSTS_N_INSNS (6), /* narrow. */
1218 COSTS_N_INSNS (8), /* toint. */
1219 COSTS_N_INSNS (8), /* fromint. */
1220 COSTS_N_INSNS (8) /* roundint. */
1225 COSTS_N_INSNS (1) /* alu. */
1229 const struct cpu_cost_table cortexa5_extra_costs
=
1235 COSTS_N_INSNS (1), /* shift. */
1236 COSTS_N_INSNS (1), /* shift_reg. */
1237 COSTS_N_INSNS (1), /* arith_shift. */
1238 COSTS_N_INSNS (1), /* arith_shift_reg. */
1239 COSTS_N_INSNS (1), /* log_shift. */
1240 COSTS_N_INSNS (1), /* log_shift_reg. */
1241 COSTS_N_INSNS (1), /* extend. */
1242 COSTS_N_INSNS (1), /* extend_arith. */
1243 COSTS_N_INSNS (1), /* bfi. */
1244 COSTS_N_INSNS (1), /* bfx. */
1245 COSTS_N_INSNS (1), /* clz. */
1246 COSTS_N_INSNS (1), /* rev. */
1248 true /* non_exec_costs_exec. */
1255 COSTS_N_INSNS (1), /* flag_setting. */
1256 COSTS_N_INSNS (1), /* extend. */
1257 COSTS_N_INSNS (1), /* add. */
1258 COSTS_N_INSNS (1), /* extend_add. */
1259 COSTS_N_INSNS (7) /* idiv. */
1263 0, /* simple (N/A). */
1264 0, /* flag_setting (N/A). */
1265 COSTS_N_INSNS (1), /* extend. */
1267 COSTS_N_INSNS (2), /* extend_add. */
1273 COSTS_N_INSNS (1), /* load. */
1274 COSTS_N_INSNS (1), /* load_sign_extend. */
1275 COSTS_N_INSNS (6), /* ldrd. */
1276 COSTS_N_INSNS (1), /* ldm_1st. */
1277 1, /* ldm_regs_per_insn_1st. */
1278 2, /* ldm_regs_per_insn_subsequent. */
1279 COSTS_N_INSNS (2), /* loadf. */
1280 COSTS_N_INSNS (4), /* loadd. */
1281 COSTS_N_INSNS (1), /* load_unaligned. */
1282 COSTS_N_INSNS (1), /* store. */
1283 COSTS_N_INSNS (3), /* strd. */
1284 COSTS_N_INSNS (1), /* stm_1st. */
1285 1, /* stm_regs_per_insn_1st. */
1286 2, /* stm_regs_per_insn_subsequent. */
1287 COSTS_N_INSNS (2), /* storef. */
1288 COSTS_N_INSNS (2), /* stored. */
1289 COSTS_N_INSNS (1), /* store_unaligned. */
1290 COSTS_N_INSNS (1), /* loadv. */
1291 COSTS_N_INSNS (1) /* storev. */
1296 COSTS_N_INSNS (15), /* div. */
1297 COSTS_N_INSNS (3), /* mult. */
1298 COSTS_N_INSNS (7), /* mult_addsub. */
1299 COSTS_N_INSNS (7), /* fma. */
1300 COSTS_N_INSNS (3), /* addsub. */
1301 COSTS_N_INSNS (3), /* fpconst. */
1302 COSTS_N_INSNS (3), /* neg. */
1303 COSTS_N_INSNS (3), /* compare. */
1304 COSTS_N_INSNS (3), /* widen. */
1305 COSTS_N_INSNS (3), /* narrow. */
1306 COSTS_N_INSNS (3), /* toint. */
1307 COSTS_N_INSNS (3), /* fromint. */
1308 COSTS_N_INSNS (3) /* roundint. */
1312 COSTS_N_INSNS (30), /* div. */
1313 COSTS_N_INSNS (6), /* mult. */
1314 COSTS_N_INSNS (10), /* mult_addsub. */
1315 COSTS_N_INSNS (7), /* fma. */
1316 COSTS_N_INSNS (3), /* addsub. */
1317 COSTS_N_INSNS (3), /* fpconst. */
1318 COSTS_N_INSNS (3), /* neg. */
1319 COSTS_N_INSNS (3), /* compare. */
1320 COSTS_N_INSNS (3), /* widen. */
1321 COSTS_N_INSNS (3), /* narrow. */
1322 COSTS_N_INSNS (3), /* toint. */
1323 COSTS_N_INSNS (3), /* fromint. */
1324 COSTS_N_INSNS (3) /* roundint. */
1329 COSTS_N_INSNS (1) /* alu. */
1334 const struct cpu_cost_table cortexa7_extra_costs
=
1340 COSTS_N_INSNS (1), /* shift. */
1341 COSTS_N_INSNS (1), /* shift_reg. */
1342 COSTS_N_INSNS (1), /* arith_shift. */
1343 COSTS_N_INSNS (1), /* arith_shift_reg. */
1344 COSTS_N_INSNS (1), /* log_shift. */
1345 COSTS_N_INSNS (1), /* log_shift_reg. */
1346 COSTS_N_INSNS (1), /* extend. */
1347 COSTS_N_INSNS (1), /* extend_arith. */
1348 COSTS_N_INSNS (1), /* bfi. */
1349 COSTS_N_INSNS (1), /* bfx. */
1350 COSTS_N_INSNS (1), /* clz. */
1351 COSTS_N_INSNS (1), /* rev. */
1353 true /* non_exec_costs_exec. */
1360 COSTS_N_INSNS (1), /* flag_setting. */
1361 COSTS_N_INSNS (1), /* extend. */
1362 COSTS_N_INSNS (1), /* add. */
1363 COSTS_N_INSNS (1), /* extend_add. */
1364 COSTS_N_INSNS (7) /* idiv. */
1368 0, /* simple (N/A). */
1369 0, /* flag_setting (N/A). */
1370 COSTS_N_INSNS (1), /* extend. */
1372 COSTS_N_INSNS (2), /* extend_add. */
1378 COSTS_N_INSNS (1), /* load. */
1379 COSTS_N_INSNS (1), /* load_sign_extend. */
1380 COSTS_N_INSNS (3), /* ldrd. */
1381 COSTS_N_INSNS (1), /* ldm_1st. */
1382 1, /* ldm_regs_per_insn_1st. */
1383 2, /* ldm_regs_per_insn_subsequent. */
1384 COSTS_N_INSNS (2), /* loadf. */
1385 COSTS_N_INSNS (2), /* loadd. */
1386 COSTS_N_INSNS (1), /* load_unaligned. */
1387 COSTS_N_INSNS (1), /* store. */
1388 COSTS_N_INSNS (3), /* strd. */
1389 COSTS_N_INSNS (1), /* stm_1st. */
1390 1, /* stm_regs_per_insn_1st. */
1391 2, /* stm_regs_per_insn_subsequent. */
1392 COSTS_N_INSNS (2), /* storef. */
1393 COSTS_N_INSNS (2), /* stored. */
1394 COSTS_N_INSNS (1), /* store_unaligned. */
1395 COSTS_N_INSNS (1), /* loadv. */
1396 COSTS_N_INSNS (1) /* storev. */
1401 COSTS_N_INSNS (15), /* div. */
1402 COSTS_N_INSNS (3), /* mult. */
1403 COSTS_N_INSNS (7), /* mult_addsub. */
1404 COSTS_N_INSNS (7), /* fma. */
1405 COSTS_N_INSNS (3), /* addsub. */
1406 COSTS_N_INSNS (3), /* fpconst. */
1407 COSTS_N_INSNS (3), /* neg. */
1408 COSTS_N_INSNS (3), /* compare. */
1409 COSTS_N_INSNS (3), /* widen. */
1410 COSTS_N_INSNS (3), /* narrow. */
1411 COSTS_N_INSNS (3), /* toint. */
1412 COSTS_N_INSNS (3), /* fromint. */
1413 COSTS_N_INSNS (3) /* roundint. */
1417 COSTS_N_INSNS (30), /* div. */
1418 COSTS_N_INSNS (6), /* mult. */
1419 COSTS_N_INSNS (10), /* mult_addsub. */
1420 COSTS_N_INSNS (7), /* fma. */
1421 COSTS_N_INSNS (3), /* addsub. */
1422 COSTS_N_INSNS (3), /* fpconst. */
1423 COSTS_N_INSNS (3), /* neg. */
1424 COSTS_N_INSNS (3), /* compare. */
1425 COSTS_N_INSNS (3), /* widen. */
1426 COSTS_N_INSNS (3), /* narrow. */
1427 COSTS_N_INSNS (3), /* toint. */
1428 COSTS_N_INSNS (3), /* fromint. */
1429 COSTS_N_INSNS (3) /* roundint. */
1434 COSTS_N_INSNS (1) /* alu. */
1438 const struct cpu_cost_table cortexa12_extra_costs
=
1445 COSTS_N_INSNS (1), /* shift_reg. */
1446 COSTS_N_INSNS (1), /* arith_shift. */
1447 COSTS_N_INSNS (1), /* arith_shift_reg. */
1448 COSTS_N_INSNS (1), /* log_shift. */
1449 COSTS_N_INSNS (1), /* log_shift_reg. */
1451 COSTS_N_INSNS (1), /* extend_arith. */
1453 COSTS_N_INSNS (1), /* bfx. */
1454 COSTS_N_INSNS (1), /* clz. */
1455 COSTS_N_INSNS (1), /* rev. */
1457 true /* non_exec_costs_exec. */
1462 COSTS_N_INSNS (2), /* simple. */
1463 COSTS_N_INSNS (3), /* flag_setting. */
1464 COSTS_N_INSNS (2), /* extend. */
1465 COSTS_N_INSNS (3), /* add. */
1466 COSTS_N_INSNS (2), /* extend_add. */
1467 COSTS_N_INSNS (18) /* idiv. */
1471 0, /* simple (N/A). */
1472 0, /* flag_setting (N/A). */
1473 COSTS_N_INSNS (3), /* extend. */
1475 COSTS_N_INSNS (3), /* extend_add. */
1481 COSTS_N_INSNS (3), /* load. */
1482 COSTS_N_INSNS (3), /* load_sign_extend. */
1483 COSTS_N_INSNS (3), /* ldrd. */
1484 COSTS_N_INSNS (3), /* ldm_1st. */
1485 1, /* ldm_regs_per_insn_1st. */
1486 2, /* ldm_regs_per_insn_subsequent. */
1487 COSTS_N_INSNS (3), /* loadf. */
1488 COSTS_N_INSNS (3), /* loadd. */
1489 0, /* load_unaligned. */
1493 1, /* stm_regs_per_insn_1st. */
1494 2, /* stm_regs_per_insn_subsequent. */
1495 COSTS_N_INSNS (2), /* storef. */
1496 COSTS_N_INSNS (2), /* stored. */
1497 0, /* store_unaligned. */
1498 COSTS_N_INSNS (1), /* loadv. */
1499 COSTS_N_INSNS (1) /* storev. */
1504 COSTS_N_INSNS (17), /* div. */
1505 COSTS_N_INSNS (4), /* mult. */
1506 COSTS_N_INSNS (8), /* mult_addsub. */
1507 COSTS_N_INSNS (8), /* fma. */
1508 COSTS_N_INSNS (4), /* addsub. */
1509 COSTS_N_INSNS (2), /* fpconst. */
1510 COSTS_N_INSNS (2), /* neg. */
1511 COSTS_N_INSNS (2), /* compare. */
1512 COSTS_N_INSNS (4), /* widen. */
1513 COSTS_N_INSNS (4), /* narrow. */
1514 COSTS_N_INSNS (4), /* toint. */
1515 COSTS_N_INSNS (4), /* fromint. */
1516 COSTS_N_INSNS (4) /* roundint. */
1520 COSTS_N_INSNS (31), /* div. */
1521 COSTS_N_INSNS (4), /* mult. */
1522 COSTS_N_INSNS (8), /* mult_addsub. */
1523 COSTS_N_INSNS (8), /* fma. */
1524 COSTS_N_INSNS (4), /* addsub. */
1525 COSTS_N_INSNS (2), /* fpconst. */
1526 COSTS_N_INSNS (2), /* neg. */
1527 COSTS_N_INSNS (2), /* compare. */
1528 COSTS_N_INSNS (4), /* widen. */
1529 COSTS_N_INSNS (4), /* narrow. */
1530 COSTS_N_INSNS (4), /* toint. */
1531 COSTS_N_INSNS (4), /* fromint. */
1532 COSTS_N_INSNS (4) /* roundint. */
1537 COSTS_N_INSNS (1) /* alu. */
1541 const struct cpu_cost_table cortexa15_extra_costs
=
1549 COSTS_N_INSNS (1), /* arith_shift. */
1550 COSTS_N_INSNS (1), /* arith_shift_reg. */
1551 COSTS_N_INSNS (1), /* log_shift. */
1552 COSTS_N_INSNS (1), /* log_shift_reg. */
1554 COSTS_N_INSNS (1), /* extend_arith. */
1555 COSTS_N_INSNS (1), /* bfi. */
1560 true /* non_exec_costs_exec. */
1565 COSTS_N_INSNS (2), /* simple. */
1566 COSTS_N_INSNS (3), /* flag_setting. */
1567 COSTS_N_INSNS (2), /* extend. */
1568 COSTS_N_INSNS (2), /* add. */
1569 COSTS_N_INSNS (2), /* extend_add. */
1570 COSTS_N_INSNS (18) /* idiv. */
1574 0, /* simple (N/A). */
1575 0, /* flag_setting (N/A). */
1576 COSTS_N_INSNS (3), /* extend. */
1578 COSTS_N_INSNS (3), /* extend_add. */
1584 COSTS_N_INSNS (3), /* load. */
1585 COSTS_N_INSNS (3), /* load_sign_extend. */
1586 COSTS_N_INSNS (3), /* ldrd. */
1587 COSTS_N_INSNS (4), /* ldm_1st. */
1588 1, /* ldm_regs_per_insn_1st. */
1589 2, /* ldm_regs_per_insn_subsequent. */
1590 COSTS_N_INSNS (4), /* loadf. */
1591 COSTS_N_INSNS (4), /* loadd. */
1592 0, /* load_unaligned. */
1595 COSTS_N_INSNS (1), /* stm_1st. */
1596 1, /* stm_regs_per_insn_1st. */
1597 2, /* stm_regs_per_insn_subsequent. */
1600 0, /* store_unaligned. */
1601 COSTS_N_INSNS (1), /* loadv. */
1602 COSTS_N_INSNS (1) /* storev. */
1607 COSTS_N_INSNS (17), /* div. */
1608 COSTS_N_INSNS (4), /* mult. */
1609 COSTS_N_INSNS (8), /* mult_addsub. */
1610 COSTS_N_INSNS (8), /* fma. */
1611 COSTS_N_INSNS (4), /* addsub. */
1612 COSTS_N_INSNS (2), /* fpconst. */
1613 COSTS_N_INSNS (2), /* neg. */
1614 COSTS_N_INSNS (5), /* compare. */
1615 COSTS_N_INSNS (4), /* widen. */
1616 COSTS_N_INSNS (4), /* narrow. */
1617 COSTS_N_INSNS (4), /* toint. */
1618 COSTS_N_INSNS (4), /* fromint. */
1619 COSTS_N_INSNS (4) /* roundint. */
1623 COSTS_N_INSNS (31), /* div. */
1624 COSTS_N_INSNS (4), /* mult. */
1625 COSTS_N_INSNS (8), /* mult_addsub. */
1626 COSTS_N_INSNS (8), /* fma. */
1627 COSTS_N_INSNS (4), /* addsub. */
1628 COSTS_N_INSNS (2), /* fpconst. */
1629 COSTS_N_INSNS (2), /* neg. */
1630 COSTS_N_INSNS (2), /* compare. */
1631 COSTS_N_INSNS (4), /* widen. */
1632 COSTS_N_INSNS (4), /* narrow. */
1633 COSTS_N_INSNS (4), /* toint. */
1634 COSTS_N_INSNS (4), /* fromint. */
1635 COSTS_N_INSNS (4) /* roundint. */
1640 COSTS_N_INSNS (1) /* alu. */
1644 const struct cpu_cost_table v7m_extra_costs
=
1652 0, /* arith_shift. */
1653 COSTS_N_INSNS (1), /* arith_shift_reg. */
1655 COSTS_N_INSNS (1), /* log_shift_reg. */
1657 COSTS_N_INSNS (1), /* extend_arith. */
1662 COSTS_N_INSNS (1), /* non_exec. */
1663 false /* non_exec_costs_exec. */
1668 COSTS_N_INSNS (1), /* simple. */
1669 COSTS_N_INSNS (1), /* flag_setting. */
1670 COSTS_N_INSNS (2), /* extend. */
1671 COSTS_N_INSNS (1), /* add. */
1672 COSTS_N_INSNS (3), /* extend_add. */
1673 COSTS_N_INSNS (8) /* idiv. */
1677 0, /* simple (N/A). */
1678 0, /* flag_setting (N/A). */
1679 COSTS_N_INSNS (2), /* extend. */
1681 COSTS_N_INSNS (3), /* extend_add. */
1687 COSTS_N_INSNS (2), /* load. */
1688 0, /* load_sign_extend. */
1689 COSTS_N_INSNS (3), /* ldrd. */
1690 COSTS_N_INSNS (2), /* ldm_1st. */
1691 1, /* ldm_regs_per_insn_1st. */
1692 1, /* ldm_regs_per_insn_subsequent. */
1693 COSTS_N_INSNS (2), /* loadf. */
1694 COSTS_N_INSNS (3), /* loadd. */
1695 COSTS_N_INSNS (1), /* load_unaligned. */
1696 COSTS_N_INSNS (2), /* store. */
1697 COSTS_N_INSNS (3), /* strd. */
1698 COSTS_N_INSNS (2), /* stm_1st. */
1699 1, /* stm_regs_per_insn_1st. */
1700 1, /* stm_regs_per_insn_subsequent. */
1701 COSTS_N_INSNS (2), /* storef. */
1702 COSTS_N_INSNS (3), /* stored. */
1703 COSTS_N_INSNS (1), /* store_unaligned. */
1704 COSTS_N_INSNS (1), /* loadv. */
1705 COSTS_N_INSNS (1) /* storev. */
1710 COSTS_N_INSNS (7), /* div. */
1711 COSTS_N_INSNS (2), /* mult. */
1712 COSTS_N_INSNS (5), /* mult_addsub. */
1713 COSTS_N_INSNS (3), /* fma. */
1714 COSTS_N_INSNS (1), /* addsub. */
1726 COSTS_N_INSNS (15), /* div. */
1727 COSTS_N_INSNS (5), /* mult. */
1728 COSTS_N_INSNS (7), /* mult_addsub. */
1729 COSTS_N_INSNS (7), /* fma. */
1730 COSTS_N_INSNS (3), /* addsub. */
1743 COSTS_N_INSNS (1) /* alu. */
1747 const struct tune_params arm_slowmul_tune
=
1749 &generic_extra_costs
, /* Insn extra costs. */
1750 NULL
, /* Sched adj cost. */
1751 arm_default_branch_cost
,
1752 &arm_default_vec_cost
,
1753 3, /* Constant limit. */
1754 5, /* Max cond insns. */
1755 8, /* Memset max inline. */
1756 1, /* Issue rate. */
1757 ARM_PREFETCH_NOT_BENEFICIAL
,
1758 tune_params::PREF_CONST_POOL_TRUE
,
1759 tune_params::PREF_LDRD_FALSE
,
1760 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1761 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1762 tune_params::DISPARAGE_FLAGS_NEITHER
,
1763 tune_params::PREF_NEON_64_FALSE
,
1764 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1765 tune_params::FUSE_NOTHING
,
1766 tune_params::SCHED_AUTOPREF_OFF
1769 const struct tune_params arm_fastmul_tune
=
1771 &generic_extra_costs
, /* Insn extra costs. */
1772 NULL
, /* Sched adj cost. */
1773 arm_default_branch_cost
,
1774 &arm_default_vec_cost
,
1775 1, /* Constant limit. */
1776 5, /* Max cond insns. */
1777 8, /* Memset max inline. */
1778 1, /* Issue rate. */
1779 ARM_PREFETCH_NOT_BENEFICIAL
,
1780 tune_params::PREF_CONST_POOL_TRUE
,
1781 tune_params::PREF_LDRD_FALSE
,
1782 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1783 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1784 tune_params::DISPARAGE_FLAGS_NEITHER
,
1785 tune_params::PREF_NEON_64_FALSE
,
1786 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1787 tune_params::FUSE_NOTHING
,
1788 tune_params::SCHED_AUTOPREF_OFF
1791 /* StrongARM has early execution of branches, so a sequence that is worth
1792 skipping is shorter. Set max_insns_skipped to a lower value. */
1794 const struct tune_params arm_strongarm_tune
=
1796 &generic_extra_costs
, /* Insn extra costs. */
1797 NULL
, /* Sched adj cost. */
1798 arm_default_branch_cost
,
1799 &arm_default_vec_cost
,
1800 1, /* Constant limit. */
1801 3, /* Max cond insns. */
1802 8, /* Memset max inline. */
1803 1, /* Issue rate. */
1804 ARM_PREFETCH_NOT_BENEFICIAL
,
1805 tune_params::PREF_CONST_POOL_TRUE
,
1806 tune_params::PREF_LDRD_FALSE
,
1807 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1808 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1809 tune_params::DISPARAGE_FLAGS_NEITHER
,
1810 tune_params::PREF_NEON_64_FALSE
,
1811 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1812 tune_params::FUSE_NOTHING
,
1813 tune_params::SCHED_AUTOPREF_OFF
1816 const struct tune_params arm_xscale_tune
=
1818 &generic_extra_costs
, /* Insn extra costs. */
1819 xscale_sched_adjust_cost
,
1820 arm_default_branch_cost
,
1821 &arm_default_vec_cost
,
1822 2, /* Constant limit. */
1823 3, /* Max cond insns. */
1824 8, /* Memset max inline. */
1825 1, /* Issue rate. */
1826 ARM_PREFETCH_NOT_BENEFICIAL
,
1827 tune_params::PREF_CONST_POOL_TRUE
,
1828 tune_params::PREF_LDRD_FALSE
,
1829 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1830 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1831 tune_params::DISPARAGE_FLAGS_NEITHER
,
1832 tune_params::PREF_NEON_64_FALSE
,
1833 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1834 tune_params::FUSE_NOTHING
,
1835 tune_params::SCHED_AUTOPREF_OFF
1838 const struct tune_params arm_9e_tune
=
1840 &generic_extra_costs
, /* Insn extra costs. */
1841 NULL
, /* Sched adj cost. */
1842 arm_default_branch_cost
,
1843 &arm_default_vec_cost
,
1844 1, /* Constant limit. */
1845 5, /* Max cond insns. */
1846 8, /* Memset max inline. */
1847 1, /* Issue rate. */
1848 ARM_PREFETCH_NOT_BENEFICIAL
,
1849 tune_params::PREF_CONST_POOL_TRUE
,
1850 tune_params::PREF_LDRD_FALSE
,
1851 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1852 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1853 tune_params::DISPARAGE_FLAGS_NEITHER
,
1854 tune_params::PREF_NEON_64_FALSE
,
1855 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1856 tune_params::FUSE_NOTHING
,
1857 tune_params::SCHED_AUTOPREF_OFF
1860 const struct tune_params arm_marvell_pj4_tune
=
1862 &generic_extra_costs
, /* Insn extra costs. */
1863 NULL
, /* Sched adj cost. */
1864 arm_default_branch_cost
,
1865 &arm_default_vec_cost
,
1866 1, /* Constant limit. */
1867 5, /* Max cond insns. */
1868 8, /* Memset max inline. */
1869 2, /* Issue rate. */
1870 ARM_PREFETCH_NOT_BENEFICIAL
,
1871 tune_params::PREF_CONST_POOL_TRUE
,
1872 tune_params::PREF_LDRD_FALSE
,
1873 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1874 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1875 tune_params::DISPARAGE_FLAGS_NEITHER
,
1876 tune_params::PREF_NEON_64_FALSE
,
1877 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1878 tune_params::FUSE_NOTHING
,
1879 tune_params::SCHED_AUTOPREF_OFF
1882 const struct tune_params arm_v6t2_tune
=
1884 &generic_extra_costs
, /* Insn extra costs. */
1885 NULL
, /* Sched adj cost. */
1886 arm_default_branch_cost
,
1887 &arm_default_vec_cost
,
1888 1, /* Constant limit. */
1889 5, /* Max cond insns. */
1890 8, /* Memset max inline. */
1891 1, /* Issue rate. */
1892 ARM_PREFETCH_NOT_BENEFICIAL
,
1893 tune_params::PREF_CONST_POOL_FALSE
,
1894 tune_params::PREF_LDRD_FALSE
,
1895 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1896 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1897 tune_params::DISPARAGE_FLAGS_NEITHER
,
1898 tune_params::PREF_NEON_64_FALSE
,
1899 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1900 tune_params::FUSE_NOTHING
,
1901 tune_params::SCHED_AUTOPREF_OFF
1905 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1906 const struct tune_params arm_cortex_tune
=
1908 &generic_extra_costs
,
1909 NULL
, /* Sched adj cost. */
1910 arm_default_branch_cost
,
1911 &arm_default_vec_cost
,
1912 1, /* Constant limit. */
1913 5, /* Max cond insns. */
1914 8, /* Memset max inline. */
1915 2, /* Issue rate. */
1916 ARM_PREFETCH_NOT_BENEFICIAL
,
1917 tune_params::PREF_CONST_POOL_FALSE
,
1918 tune_params::PREF_LDRD_FALSE
,
1919 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1920 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1921 tune_params::DISPARAGE_FLAGS_NEITHER
,
1922 tune_params::PREF_NEON_64_FALSE
,
1923 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1924 tune_params::FUSE_NOTHING
,
1925 tune_params::SCHED_AUTOPREF_OFF
1928 const struct tune_params arm_cortex_a8_tune
=
1930 &cortexa8_extra_costs
,
1931 NULL
, /* Sched adj cost. */
1932 arm_default_branch_cost
,
1933 &arm_default_vec_cost
,
1934 1, /* Constant limit. */
1935 5, /* Max cond insns. */
1936 8, /* Memset max inline. */
1937 2, /* Issue rate. */
1938 ARM_PREFETCH_NOT_BENEFICIAL
,
1939 tune_params::PREF_CONST_POOL_FALSE
,
1940 tune_params::PREF_LDRD_FALSE
,
1941 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1942 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1943 tune_params::DISPARAGE_FLAGS_NEITHER
,
1944 tune_params::PREF_NEON_64_FALSE
,
1945 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1946 tune_params::FUSE_NOTHING
,
1947 tune_params::SCHED_AUTOPREF_OFF
1950 const struct tune_params arm_cortex_a7_tune
=
1952 &cortexa7_extra_costs
,
1953 NULL
, /* Sched adj cost. */
1954 arm_default_branch_cost
,
1955 &arm_default_vec_cost
,
1956 1, /* Constant limit. */
1957 5, /* Max cond insns. */
1958 8, /* Memset max inline. */
1959 2, /* Issue rate. */
1960 ARM_PREFETCH_NOT_BENEFICIAL
,
1961 tune_params::PREF_CONST_POOL_FALSE
,
1962 tune_params::PREF_LDRD_FALSE
,
1963 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1964 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1965 tune_params::DISPARAGE_FLAGS_NEITHER
,
1966 tune_params::PREF_NEON_64_FALSE
,
1967 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1968 tune_params::FUSE_NOTHING
,
1969 tune_params::SCHED_AUTOPREF_OFF
1972 const struct tune_params arm_cortex_a15_tune
=
1974 &cortexa15_extra_costs
,
1975 NULL
, /* Sched adj cost. */
1976 arm_default_branch_cost
,
1977 &arm_default_vec_cost
,
1978 1, /* Constant limit. */
1979 2, /* Max cond insns. */
1980 8, /* Memset max inline. */
1981 3, /* Issue rate. */
1982 ARM_PREFETCH_NOT_BENEFICIAL
,
1983 tune_params::PREF_CONST_POOL_FALSE
,
1984 tune_params::PREF_LDRD_TRUE
,
1985 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1986 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1987 tune_params::DISPARAGE_FLAGS_ALL
,
1988 tune_params::PREF_NEON_64_FALSE
,
1989 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1990 tune_params::FUSE_NOTHING
,
1991 tune_params::SCHED_AUTOPREF_FULL
1994 const struct tune_params arm_cortex_a35_tune
=
1996 &cortexa53_extra_costs
,
1997 NULL
, /* Sched adj cost. */
1998 arm_default_branch_cost
,
1999 &arm_default_vec_cost
,
2000 1, /* Constant limit. */
2001 5, /* Max cond insns. */
2002 8, /* Memset max inline. */
2003 1, /* Issue rate. */
2004 ARM_PREFETCH_NOT_BENEFICIAL
,
2005 tune_params::PREF_CONST_POOL_FALSE
,
2006 tune_params::PREF_LDRD_FALSE
,
2007 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2008 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2009 tune_params::DISPARAGE_FLAGS_NEITHER
,
2010 tune_params::PREF_NEON_64_FALSE
,
2011 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2012 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2013 tune_params::SCHED_AUTOPREF_OFF
2016 const struct tune_params arm_cortex_a53_tune
=
2018 &cortexa53_extra_costs
,
2019 NULL
, /* Sched adj cost. */
2020 arm_default_branch_cost
,
2021 &arm_default_vec_cost
,
2022 1, /* Constant limit. */
2023 5, /* Max cond insns. */
2024 8, /* Memset max inline. */
2025 2, /* Issue rate. */
2026 ARM_PREFETCH_NOT_BENEFICIAL
,
2027 tune_params::PREF_CONST_POOL_FALSE
,
2028 tune_params::PREF_LDRD_FALSE
,
2029 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2030 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2031 tune_params::DISPARAGE_FLAGS_NEITHER
,
2032 tune_params::PREF_NEON_64_FALSE
,
2033 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2034 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2035 tune_params::SCHED_AUTOPREF_OFF
2038 const struct tune_params arm_cortex_a57_tune
=
2040 &cortexa57_extra_costs
,
2041 NULL
, /* Sched adj cost. */
2042 arm_default_branch_cost
,
2043 &arm_default_vec_cost
,
2044 1, /* Constant limit. */
2045 2, /* Max cond insns. */
2046 8, /* Memset max inline. */
2047 3, /* Issue rate. */
2048 ARM_PREFETCH_NOT_BENEFICIAL
,
2049 tune_params::PREF_CONST_POOL_FALSE
,
2050 tune_params::PREF_LDRD_TRUE
,
2051 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2052 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2053 tune_params::DISPARAGE_FLAGS_ALL
,
2054 tune_params::PREF_NEON_64_FALSE
,
2055 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2056 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2057 tune_params::SCHED_AUTOPREF_FULL
2060 const struct tune_params arm_exynosm1_tune
=
2062 &exynosm1_extra_costs
,
2063 NULL
, /* Sched adj cost. */
2064 arm_default_branch_cost
,
2065 &arm_default_vec_cost
,
2066 1, /* Constant limit. */
2067 2, /* Max cond insns. */
2068 8, /* Memset max inline. */
2069 3, /* Issue rate. */
2070 ARM_PREFETCH_NOT_BENEFICIAL
,
2071 tune_params::PREF_CONST_POOL_FALSE
,
2072 tune_params::PREF_LDRD_TRUE
,
2073 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2074 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2075 tune_params::DISPARAGE_FLAGS_ALL
,
2076 tune_params::PREF_NEON_64_FALSE
,
2077 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2078 tune_params::FUSE_NOTHING
,
2079 tune_params::SCHED_AUTOPREF_OFF
2082 const struct tune_params arm_xgene1_tune
=
2084 &xgene1_extra_costs
,
2085 NULL
, /* Sched adj cost. */
2086 arm_default_branch_cost
,
2087 &arm_default_vec_cost
,
2088 1, /* Constant limit. */
2089 2, /* Max cond insns. */
2090 32, /* Memset max inline. */
2091 4, /* Issue rate. */
2092 ARM_PREFETCH_NOT_BENEFICIAL
,
2093 tune_params::PREF_CONST_POOL_FALSE
,
2094 tune_params::PREF_LDRD_TRUE
,
2095 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2096 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2097 tune_params::DISPARAGE_FLAGS_ALL
,
2098 tune_params::PREF_NEON_64_FALSE
,
2099 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2100 tune_params::FUSE_NOTHING
,
2101 tune_params::SCHED_AUTOPREF_OFF
2104 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2105 less appealing. Set max_insns_skipped to a low value. */
2107 const struct tune_params arm_cortex_a5_tune
=
2109 &cortexa5_extra_costs
,
2110 NULL
, /* Sched adj cost. */
2111 arm_cortex_a5_branch_cost
,
2112 &arm_default_vec_cost
,
2113 1, /* Constant limit. */
2114 1, /* Max cond insns. */
2115 8, /* Memset max inline. */
2116 2, /* Issue rate. */
2117 ARM_PREFETCH_NOT_BENEFICIAL
,
2118 tune_params::PREF_CONST_POOL_FALSE
,
2119 tune_params::PREF_LDRD_FALSE
,
2120 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2121 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2122 tune_params::DISPARAGE_FLAGS_NEITHER
,
2123 tune_params::PREF_NEON_64_FALSE
,
2124 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2125 tune_params::FUSE_NOTHING
,
2126 tune_params::SCHED_AUTOPREF_OFF
2129 const struct tune_params arm_cortex_a9_tune
=
2131 &cortexa9_extra_costs
,
2132 cortex_a9_sched_adjust_cost
,
2133 arm_default_branch_cost
,
2134 &arm_default_vec_cost
,
2135 1, /* Constant limit. */
2136 5, /* Max cond insns. */
2137 8, /* Memset max inline. */
2138 2, /* Issue rate. */
2139 ARM_PREFETCH_BENEFICIAL(4,32,32),
2140 tune_params::PREF_CONST_POOL_FALSE
,
2141 tune_params::PREF_LDRD_FALSE
,
2142 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2143 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2144 tune_params::DISPARAGE_FLAGS_NEITHER
,
2145 tune_params::PREF_NEON_64_FALSE
,
2146 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2147 tune_params::FUSE_NOTHING
,
2148 tune_params::SCHED_AUTOPREF_OFF
2151 const struct tune_params arm_cortex_a12_tune
=
2153 &cortexa12_extra_costs
,
2154 NULL
, /* Sched adj cost. */
2155 arm_default_branch_cost
,
2156 &arm_default_vec_cost
, /* Vectorizer costs. */
2157 1, /* Constant limit. */
2158 2, /* Max cond insns. */
2159 8, /* Memset max inline. */
2160 2, /* Issue rate. */
2161 ARM_PREFETCH_NOT_BENEFICIAL
,
2162 tune_params::PREF_CONST_POOL_FALSE
,
2163 tune_params::PREF_LDRD_TRUE
,
2164 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2165 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2166 tune_params::DISPARAGE_FLAGS_ALL
,
2167 tune_params::PREF_NEON_64_FALSE
,
2168 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2169 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2170 tune_params::SCHED_AUTOPREF_OFF
2173 const struct tune_params arm_cortex_a73_tune
=
2175 &cortexa57_extra_costs
,
2176 NULL
, /* Sched adj cost. */
2177 arm_default_branch_cost
,
2178 &arm_default_vec_cost
, /* Vectorizer costs. */
2179 1, /* Constant limit. */
2180 2, /* Max cond insns. */
2181 8, /* Memset max inline. */
2182 2, /* Issue rate. */
2183 ARM_PREFETCH_NOT_BENEFICIAL
,
2184 tune_params::PREF_CONST_POOL_FALSE
,
2185 tune_params::PREF_LDRD_TRUE
,
2186 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2187 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2188 tune_params::DISPARAGE_FLAGS_ALL
,
2189 tune_params::PREF_NEON_64_FALSE
,
2190 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2191 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2192 tune_params::SCHED_AUTOPREF_FULL
2195 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2196 cycle to execute each. An LDR from the constant pool also takes two cycles
2197 to execute, but mildly increases pipelining opportunity (consecutive
2198 loads/stores can be pipelined together, saving one cycle), and may also
2199 improve icache utilisation. Hence we prefer the constant pool for such
2202 const struct tune_params arm_v7m_tune
=
2205 NULL
, /* Sched adj cost. */
2206 arm_cortex_m_branch_cost
,
2207 &arm_default_vec_cost
,
2208 1, /* Constant limit. */
2209 2, /* Max cond insns. */
2210 8, /* Memset max inline. */
2211 1, /* Issue rate. */
2212 ARM_PREFETCH_NOT_BENEFICIAL
,
2213 tune_params::PREF_CONST_POOL_TRUE
,
2214 tune_params::PREF_LDRD_FALSE
,
2215 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2217 tune_params::DISPARAGE_FLAGS_NEITHER
,
2218 tune_params::PREF_NEON_64_FALSE
,
2219 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2220 tune_params::FUSE_NOTHING
,
2221 tune_params::SCHED_AUTOPREF_OFF
2224 /* Cortex-M7 tuning. */
2226 const struct tune_params arm_cortex_m7_tune
=
2229 NULL
, /* Sched adj cost. */
2230 arm_cortex_m7_branch_cost
,
2231 &arm_default_vec_cost
,
2232 0, /* Constant limit. */
2233 1, /* Max cond insns. */
2234 8, /* Memset max inline. */
2235 2, /* Issue rate. */
2236 ARM_PREFETCH_NOT_BENEFICIAL
,
2237 tune_params::PREF_CONST_POOL_TRUE
,
2238 tune_params::PREF_LDRD_FALSE
,
2239 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2240 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2241 tune_params::DISPARAGE_FLAGS_NEITHER
,
2242 tune_params::PREF_NEON_64_FALSE
,
2243 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2244 tune_params::FUSE_NOTHING
,
2245 tune_params::SCHED_AUTOPREF_OFF
2248 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2249 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2251 const struct tune_params arm_v6m_tune
=
2253 &generic_extra_costs
, /* Insn extra costs. */
2254 NULL
, /* Sched adj cost. */
2255 arm_default_branch_cost
,
2256 &arm_default_vec_cost
, /* Vectorizer costs. */
2257 1, /* Constant limit. */
2258 5, /* Max cond insns. */
2259 8, /* Memset max inline. */
2260 1, /* Issue rate. */
2261 ARM_PREFETCH_NOT_BENEFICIAL
,
2262 tune_params::PREF_CONST_POOL_FALSE
,
2263 tune_params::PREF_LDRD_FALSE
,
2264 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2265 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2266 tune_params::DISPARAGE_FLAGS_NEITHER
,
2267 tune_params::PREF_NEON_64_FALSE
,
2268 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2269 tune_params::FUSE_NOTHING
,
2270 tune_params::SCHED_AUTOPREF_OFF
2273 const struct tune_params arm_fa726te_tune
=
2275 &generic_extra_costs
, /* Insn extra costs. */
2276 fa726te_sched_adjust_cost
,
2277 arm_default_branch_cost
,
2278 &arm_default_vec_cost
,
2279 1, /* Constant limit. */
2280 5, /* Max cond insns. */
2281 8, /* Memset max inline. */
2282 2, /* Issue rate. */
2283 ARM_PREFETCH_NOT_BENEFICIAL
,
2284 tune_params::PREF_CONST_POOL_TRUE
,
2285 tune_params::PREF_LDRD_FALSE
,
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2288 tune_params::DISPARAGE_FLAGS_NEITHER
,
2289 tune_params::PREF_NEON_64_FALSE
,
2290 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2291 tune_params::FUSE_NOTHING
,
2292 tune_params::SCHED_AUTOPREF_OFF
2295 /* Auto-generated CPU, FPU and architecture tables. */
2296 #include "arm-cpu-data.h"
2298 /* The name of the preprocessor macro to define for this architecture. PROFILE
2299 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2300 is thus chosen to be big enough to hold the longest architecture name. */
2302 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2304 /* Supported TLS relocations. */
2312 TLS_DESCSEQ
/* GNU scheme */
2315 /* The maximum number of insns to be used when loading a constant. */
2317 arm_constant_limit (bool size_p
)
2319 return size_p
? 1 : current_tune
->constant_limit
;
2322 /* Emit an insn that's a simple single-set. Both the operands must be known
2324 inline static rtx_insn
*
2325 emit_set_insn (rtx x
, rtx y
)
2327 return emit_insn (gen_rtx_SET (x
, y
));
2330 /* Return the number of bits set in VALUE. */
2332 bit_count (unsigned long value
)
2334 unsigned long count
= 0;
2339 value
&= value
- 1; /* Clear the least-significant set bit. */
2345 /* Return the number of bits set in BMAP. */
2347 bitmap_popcount (const sbitmap bmap
)
2349 unsigned int count
= 0;
2351 sbitmap_iterator sbi
;
2353 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2362 } arm_fixed_mode_set
;
2364 /* A small helper for setting fixed-point library libfuncs. */
2367 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2368 const char *funcname
, const char *modename
,
2373 if (num_suffix
== 0)
2374 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2376 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2378 set_optab_libfunc (optable
, mode
, buffer
);
2382 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2383 machine_mode from
, const char *funcname
,
2384 const char *toname
, const char *fromname
)
2387 const char *maybe_suffix_2
= "";
2389 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2390 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2391 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2392 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2393 maybe_suffix_2
= "2";
2395 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2398 set_conv_libfunc (optable
, to
, from
, buffer
);
2401 /* Set up library functions unique to ARM. */
2404 arm_init_libfuncs (void)
2406 /* For Linux, we have access to kernel support for atomic operations. */
2407 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2408 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2410 /* There are no special library functions unless we are using the
2415 /* The functions below are described in Section 4 of the "Run-Time
2416 ABI for the ARM architecture", Version 1.0. */
2418 /* Double-precision floating-point arithmetic. Table 2. */
2419 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2420 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2421 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2422 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2423 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2425 /* Double-precision comparisons. Table 3. */
2426 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2427 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2428 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2429 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2430 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2431 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2432 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2434 /* Single-precision floating-point arithmetic. Table 4. */
2435 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2436 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2437 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2438 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2439 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2441 /* Single-precision comparisons. Table 5. */
2442 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2443 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2444 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2445 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2446 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2447 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2448 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2450 /* Floating-point to integer conversions. Table 6. */
2451 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2452 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2453 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2454 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2455 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2456 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2457 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2458 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2460 /* Conversions between floating types. Table 7. */
2461 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2462 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2464 /* Integer to floating-point conversions. Table 8. */
2465 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2466 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2467 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2468 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2469 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2470 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2471 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2472 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2474 /* Long long. Table 9. */
2475 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2476 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2477 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2478 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2479 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2480 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2481 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2482 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2484 /* Integer (32/32->32) division. \S 4.3.1. */
2485 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2486 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2488 /* The divmod functions are designed so that they can be used for
2489 plain division, even though they return both the quotient and the
2490 remainder. The quotient is returned in the usual location (i.e.,
2491 r0 for SImode, {r0, r1} for DImode), just as would be expected
2492 for an ordinary division routine. Because the AAPCS calling
2493 conventions specify that all of { r0, r1, r2, r3 } are
2494 callee-saved registers, there is no need to tell the compiler
2495 explicitly that those registers are clobbered by these
2497 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2498 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2500 /* For SImode division the ABI provides div-without-mod routines,
2501 which are faster. */
2502 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2503 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2505 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2506 divmod libcalls instead. */
2507 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2508 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2509 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2510 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2512 /* Half-precision float operations. The compiler handles all operations
2513 with NULL libfuncs by converting the SFmode. */
2514 switch (arm_fp16_format
)
2516 case ARM_FP16_FORMAT_IEEE
:
2517 case ARM_FP16_FORMAT_ALTERNATIVE
:
2520 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2521 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2523 : "__gnu_f2h_alternative"));
2524 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2525 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2527 : "__gnu_h2f_alternative"));
2529 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2530 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2532 : "__gnu_d2h_alternative"));
2535 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2536 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2537 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2538 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2539 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2542 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2543 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2544 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2545 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2546 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2547 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2548 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2555 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2557 const arm_fixed_mode_set fixed_arith_modes
[] =
2560 { E_UQQmode
, "uqq" },
2562 { E_UHQmode
, "uhq" },
2564 { E_USQmode
, "usq" },
2566 { E_UDQmode
, "udq" },
2568 { E_UTQmode
, "utq" },
2570 { E_UHAmode
, "uha" },
2572 { E_USAmode
, "usa" },
2574 { E_UDAmode
, "uda" },
2576 { E_UTAmode
, "uta" }
2578 const arm_fixed_mode_set fixed_conv_modes
[] =
2581 { E_UQQmode
, "uqq" },
2583 { E_UHQmode
, "uhq" },
2585 { E_USQmode
, "usq" },
2587 { E_UDQmode
, "udq" },
2589 { E_UTQmode
, "utq" },
2591 { E_UHAmode
, "uha" },
2593 { E_USAmode
, "usa" },
2595 { E_UDAmode
, "uda" },
2597 { E_UTAmode
, "uta" },
2608 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2610 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2611 "add", fixed_arith_modes
[i
].name
, 3);
2612 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2613 "ssadd", fixed_arith_modes
[i
].name
, 3);
2614 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2615 "usadd", fixed_arith_modes
[i
].name
, 3);
2616 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2617 "sub", fixed_arith_modes
[i
].name
, 3);
2618 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2619 "sssub", fixed_arith_modes
[i
].name
, 3);
2620 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2621 "ussub", fixed_arith_modes
[i
].name
, 3);
2622 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2623 "mul", fixed_arith_modes
[i
].name
, 3);
2624 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2625 "ssmul", fixed_arith_modes
[i
].name
, 3);
2626 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2627 "usmul", fixed_arith_modes
[i
].name
, 3);
2628 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2629 "div", fixed_arith_modes
[i
].name
, 3);
2630 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2631 "udiv", fixed_arith_modes
[i
].name
, 3);
2632 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2633 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2634 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2635 "usdiv", fixed_arith_modes
[i
].name
, 3);
2636 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2637 "neg", fixed_arith_modes
[i
].name
, 2);
2638 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2639 "ssneg", fixed_arith_modes
[i
].name
, 2);
2640 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2641 "usneg", fixed_arith_modes
[i
].name
, 2);
2642 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2643 "ashl", fixed_arith_modes
[i
].name
, 3);
2644 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2645 "ashr", fixed_arith_modes
[i
].name
, 3);
2646 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2647 "lshr", fixed_arith_modes
[i
].name
, 3);
2648 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2649 "ssashl", fixed_arith_modes
[i
].name
, 3);
2650 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2651 "usashl", fixed_arith_modes
[i
].name
, 3);
2652 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2653 "cmp", fixed_arith_modes
[i
].name
, 2);
2656 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2657 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2660 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2661 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2664 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2665 fixed_conv_modes
[j
].mode
, "fract",
2666 fixed_conv_modes
[i
].name
,
2667 fixed_conv_modes
[j
].name
);
2668 arm_set_fixed_conv_libfunc (satfract_optab
,
2669 fixed_conv_modes
[i
].mode
,
2670 fixed_conv_modes
[j
].mode
, "satfract",
2671 fixed_conv_modes
[i
].name
,
2672 fixed_conv_modes
[j
].name
);
2673 arm_set_fixed_conv_libfunc (fractuns_optab
,
2674 fixed_conv_modes
[i
].mode
,
2675 fixed_conv_modes
[j
].mode
, "fractuns",
2676 fixed_conv_modes
[i
].name
,
2677 fixed_conv_modes
[j
].name
);
2678 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2679 fixed_conv_modes
[i
].mode
,
2680 fixed_conv_modes
[j
].mode
, "satfractuns",
2681 fixed_conv_modes
[i
].name
,
2682 fixed_conv_modes
[j
].name
);
2686 if (TARGET_AAPCS_BASED
)
2687 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2690 /* On AAPCS systems, this is the "struct __va_list". */
2691 static GTY(()) tree va_list_type
;
2693 /* Return the type to use as __builtin_va_list. */
2695 arm_build_builtin_va_list (void)
2700 if (!TARGET_AAPCS_BASED
)
2701 return std_build_builtin_va_list ();
2703 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2711 The C Library ABI further reinforces this definition in \S
2714 We must follow this definition exactly. The structure tag
2715 name is visible in C++ mangled names, and thus forms a part
2716 of the ABI. The field name may be used by people who
2717 #include <stdarg.h>. */
2718 /* Create the type. */
2719 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2720 /* Give it the required name. */
2721 va_list_name
= build_decl (BUILTINS_LOCATION
,
2723 get_identifier ("__va_list"),
2725 DECL_ARTIFICIAL (va_list_name
) = 1;
2726 TYPE_NAME (va_list_type
) = va_list_name
;
2727 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2728 /* Create the __ap field. */
2729 ap_field
= build_decl (BUILTINS_LOCATION
,
2731 get_identifier ("__ap"),
2733 DECL_ARTIFICIAL (ap_field
) = 1;
2734 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2735 TYPE_FIELDS (va_list_type
) = ap_field
;
2736 /* Compute its layout. */
2737 layout_type (va_list_type
);
2739 return va_list_type
;
2742 /* Return an expression of type "void *" pointing to the next
2743 available argument in a variable-argument list. VALIST is the
2744 user-level va_list object, of type __builtin_va_list. */
2746 arm_extract_valist_ptr (tree valist
)
2748 if (TREE_TYPE (valist
) == error_mark_node
)
2749 return error_mark_node
;
2751 /* On an AAPCS target, the pointer is stored within "struct
2753 if (TARGET_AAPCS_BASED
)
2755 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2756 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2757 valist
, ap_field
, NULL_TREE
);
2763 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2765 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2767 valist
= arm_extract_valist_ptr (valist
);
2768 std_expand_builtin_va_start (valist
, nextarg
);
2771 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2773 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2776 valist
= arm_extract_valist_ptr (valist
);
2777 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2780 /* Check any incompatible options that the user has specified. */
2782 arm_option_check_internal (struct gcc_options
*opts
)
2784 int flags
= opts
->x_target_flags
;
2786 /* iWMMXt and NEON are incompatible. */
2788 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2789 error ("iWMMXt and NEON are incompatible");
2791 /* Make sure that the processor choice does not conflict with any of the
2792 other command line choices. */
2793 if (TARGET_ARM_P (flags
)
2794 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2795 error ("target CPU does not support ARM mode");
2797 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2798 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2799 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2801 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2802 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2804 /* If this target is normally configured to use APCS frames, warn if they
2805 are turned off and debugging is turned on. */
2806 if (TARGET_ARM_P (flags
)
2807 && write_symbols
!= NO_DEBUG
2808 && !TARGET_APCS_FRAME
2809 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2810 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2812 /* iWMMXt unsupported under Thumb mode. */
2813 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2814 error ("iWMMXt unsupported under Thumb mode");
2816 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2817 error ("can not use -mtp=cp15 with 16-bit Thumb");
2819 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2821 error ("RTP PIC is incompatible with Thumb");
2825 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2827 if ((target_pure_code
|| target_slow_flash_data
)
2828 && (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2830 const char *flag
= (target_pure_code
? "-mpure-code" :
2831 "-mslow-flash-data");
2832 error ("%s only supports non-pic code on M-profile targets with the "
2833 "MOVT instruction", flag
);
2838 /* Recompute the global settings depending on target attribute options. */
2841 arm_option_params_internal (void)
2843 /* If we are not using the default (ARM mode) section anchor offset
2844 ranges, then set the correct ranges now. */
2847 /* Thumb-1 LDR instructions cannot have negative offsets.
2848 Permissible positive offset ranges are 5-bit (for byte loads),
2849 6-bit (for halfword loads), or 7-bit (for word loads).
2850 Empirical results suggest a 7-bit anchor range gives the best
2851 overall code size. */
2852 targetm
.min_anchor_offset
= 0;
2853 targetm
.max_anchor_offset
= 127;
2855 else if (TARGET_THUMB2
)
2857 /* The minimum is set such that the total size of the block
2858 for a particular anchor is 248 + 1 + 4095 bytes, which is
2859 divisible by eight, ensuring natural spacing of anchors. */
2860 targetm
.min_anchor_offset
= -248;
2861 targetm
.max_anchor_offset
= 4095;
2865 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2866 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2871 /* If optimizing for size, bump the number of instructions that we
2872 are prepared to conditionally execute (even on a StrongARM). */
2873 max_insns_skipped
= 6;
2875 /* For THUMB2, we limit the conditional sequence to one IT block. */
2877 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2880 /* When -mrestrict-it is in use tone down the if-conversion. */
2881 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2882 ? 1 : current_tune
->max_insns_skipped
;
2885 /* True if -mflip-thumb should next add an attribute for the default
2886 mode, false if it should next add an attribute for the opposite mode. */
2887 static GTY(()) bool thumb_flipper
;
2889 /* Options after initial target override. */
2890 static GTY(()) tree init_optimize
;
2893 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2895 if (opts
->x_align_functions
<= 0)
2896 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2897 && opts
->x_optimize_size
? 2 : 4;
2900 /* Implement targetm.override_options_after_change. */
2903 arm_override_options_after_change (void)
2905 arm_configure_build_target (&arm_active_target
,
2906 TREE_TARGET_OPTION (target_option_default_node
),
2907 &global_options_set
, false);
2909 arm_override_options_after_change_1 (&global_options
);
2912 /* Implement TARGET_OPTION_SAVE. */
2914 arm_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
2916 ptr
->x_arm_arch_string
= opts
->x_arm_arch_string
;
2917 ptr
->x_arm_cpu_string
= opts
->x_arm_cpu_string
;
2918 ptr
->x_arm_tune_string
= opts
->x_arm_tune_string
;
2921 /* Implement TARGET_OPTION_RESTORE. */
2923 arm_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
2925 opts
->x_arm_arch_string
= ptr
->x_arm_arch_string
;
2926 opts
->x_arm_cpu_string
= ptr
->x_arm_cpu_string
;
2927 opts
->x_arm_tune_string
= ptr
->x_arm_tune_string
;
2928 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2932 /* Reset options between modes that the user has specified. */
2934 arm_option_override_internal (struct gcc_options
*opts
,
2935 struct gcc_options
*opts_set
)
2937 arm_override_options_after_change_1 (opts
);
2939 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2941 /* The default is to enable interworking, so this warning message would
2942 be confusing to users who have just compiled with, eg, -march=armv3. */
2943 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2944 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2947 if (TARGET_THUMB_P (opts
->x_target_flags
)
2948 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2950 warning (0, "target CPU does not support THUMB instructions");
2951 opts
->x_target_flags
&= ~MASK_THUMB
;
2954 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2956 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2957 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2960 /* Callee super interworking implies thumb interworking. Adding
2961 this to the flags here simplifies the logic elsewhere. */
2962 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2963 opts
->x_target_flags
|= MASK_INTERWORK
;
2965 /* need to remember initial values so combinaisons of options like
2966 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2967 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2969 if (! opts_set
->x_arm_restrict_it
)
2970 opts
->x_arm_restrict_it
= arm_arch8
;
2972 /* ARM execution state and M profile don't have [restrict] IT. */
2973 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2974 opts
->x_arm_restrict_it
= 0;
2976 /* Enable -munaligned-access by default for
2977 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2978 i.e. Thumb2 and ARM state only.
2979 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2980 - ARMv8 architecture-base processors.
2982 Disable -munaligned-access by default for
2983 - all pre-ARMv6 architecture-based processors
2984 - ARMv6-M architecture-based processors
2985 - ARMv8-M Baseline processors. */
2987 if (! opts_set
->x_unaligned_access
)
2989 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2990 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2992 else if (opts
->x_unaligned_access
== 1
2993 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2995 warning (0, "target CPU does not support unaligned accesses");
2996 opts
->x_unaligned_access
= 0;
2999 /* Don't warn since it's on by default in -O2. */
3000 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3001 opts
->x_flag_schedule_insns
= 0;
3003 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3005 /* Disable shrink-wrap when optimizing function for size, since it tends to
3006 generate additional returns. */
3007 if (optimize_function_for_size_p (cfun
)
3008 && TARGET_THUMB2_P (opts
->x_target_flags
))
3009 opts
->x_flag_shrink_wrap
= false;
3011 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3013 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3014 - epilogue_insns - does not accurately model the corresponding insns
3015 emitted in the asm file. In particular, see the comment in thumb_exit
3016 'Find out how many of the (return) argument registers we can corrupt'.
3017 As a consequence, the epilogue may clobber registers without fipa-ra
3018 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3019 TODO: Accurately model clobbers for epilogue_insns and reenable
3021 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3022 opts
->x_flag_ipa_ra
= 0;
3024 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3026 /* Thumb2 inline assembly code should always use unified syntax.
3027 This will apply to ARM and Thumb1 eventually. */
3028 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3030 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3031 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3035 static sbitmap isa_all_fpubits
;
3036 static sbitmap isa_quirkbits
;
3038 /* Configure a build target TARGET from the user-specified options OPTS and
3039 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3040 architecture have been specified, but the two are not identical. */
3042 arm_configure_build_target (struct arm_build_target
*target
,
3043 struct cl_target_option
*opts
,
3044 struct gcc_options
*opts_set
,
3045 bool warn_compatible
)
3047 const cpu_option
*arm_selected_tune
= NULL
;
3048 const arch_option
*arm_selected_arch
= NULL
;
3049 const cpu_option
*arm_selected_cpu
= NULL
;
3050 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3051 const char *tune_opts
= NULL
;
3052 const char *arch_opts
= NULL
;
3053 const char *cpu_opts
= NULL
;
3055 bitmap_clear (target
->isa
);
3056 target
->core_name
= NULL
;
3057 target
->arch_name
= NULL
;
3059 if (opts_set
->x_arm_arch_string
)
3061 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3063 opts
->x_arm_arch_string
);
3064 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3067 if (opts_set
->x_arm_cpu_string
)
3069 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3070 opts
->x_arm_cpu_string
);
3071 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3072 arm_selected_tune
= arm_selected_cpu
;
3073 /* If taking the tuning from -mcpu, we don't need to rescan the
3074 options for tuning. */
3077 if (opts_set
->x_arm_tune_string
)
3079 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3080 opts
->x_arm_tune_string
);
3081 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3084 if (arm_selected_arch
)
3086 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3087 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3090 if (arm_selected_cpu
)
3092 auto_sbitmap
cpu_isa (isa_num_bits
);
3093 auto_sbitmap
isa_delta (isa_num_bits
);
3095 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3096 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3098 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3099 /* Ignore any bits that are quirk bits. */
3100 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3101 /* Ignore (for now) any bits that might be set by -mfpu. */
3102 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits
);
3104 if (!bitmap_empty_p (isa_delta
))
3106 if (warn_compatible
)
3107 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3108 arm_selected_cpu
->common
.name
,
3109 arm_selected_arch
->common
.name
);
3110 /* -march wins for code generation.
3111 -mcpu wins for default tuning. */
3112 if (!arm_selected_tune
)
3113 arm_selected_tune
= arm_selected_cpu
;
3115 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3116 target
->arch_name
= arm_selected_arch
->common
.name
;
3120 /* Architecture and CPU are essentially the same.
3121 Prefer the CPU setting. */
3122 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3123 target
->core_name
= arm_selected_cpu
->common
.name
;
3124 /* Copy the CPU's capabilities, so that we inherit the
3125 appropriate extensions and quirks. */
3126 bitmap_copy (target
->isa
, cpu_isa
);
3131 /* Pick a CPU based on the architecture. */
3132 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3133 target
->arch_name
= arm_selected_arch
->common
.name
;
3134 /* Note: target->core_name is left unset in this path. */
3137 else if (arm_selected_cpu
)
3139 target
->core_name
= arm_selected_cpu
->common
.name
;
3140 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3141 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3143 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3145 /* If the user did not specify a processor or architecture, choose
3149 const cpu_option
*sel
;
3150 auto_sbitmap
sought_isa (isa_num_bits
);
3151 bitmap_clear (sought_isa
);
3152 auto_sbitmap
default_isa (isa_num_bits
);
3154 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3155 TARGET_CPU_DEFAULT
);
3156 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3157 gcc_assert (arm_selected_cpu
->common
.name
);
3159 /* RWE: All of the selection logic below (to the end of this
3160 'if' clause) looks somewhat suspect. It appears to be mostly
3161 there to support forcing thumb support when the default CPU
3162 does not have thumb (somewhat dubious in terms of what the
3163 user might be expecting). I think it should be removed once
3164 support for the pre-thumb era cores is removed. */
3165 sel
= arm_selected_cpu
;
3166 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3167 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3170 /* Now check to see if the user has specified any command line
3171 switches that require certain abilities from the cpu. */
3173 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3175 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3176 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3178 /* There are no ARM processors that support both APCS-26 and
3179 interworking. Therefore we forcibly remove MODE26 from
3180 from the isa features here (if it was set), so that the
3181 search below will always be able to find a compatible
3183 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3186 /* If there are such requirements and the default CPU does not
3187 satisfy them, we need to run over the complete list of
3188 cores looking for one that is satisfactory. */
3189 if (!bitmap_empty_p (sought_isa
)
3190 && !bitmap_subset_p (sought_isa
, default_isa
))
3192 auto_sbitmap
candidate_isa (isa_num_bits
);
3193 /* We're only interested in a CPU with at least the
3194 capabilities of the default CPU and the required
3195 additional features. */
3196 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3198 /* Try to locate a CPU type that supports all of the abilities
3199 of the default CPU, plus the extra abilities requested by
3201 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3203 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3204 /* An exact match? */
3205 if (bitmap_equal_p (default_isa
, candidate_isa
))
3209 if (sel
->common
.name
== NULL
)
3211 unsigned current_bit_count
= isa_num_bits
;
3212 const cpu_option
*best_fit
= NULL
;
3214 /* Ideally we would like to issue an error message here
3215 saying that it was not possible to find a CPU compatible
3216 with the default CPU, but which also supports the command
3217 line options specified by the programmer, and so they
3218 ought to use the -mcpu=<name> command line option to
3219 override the default CPU type.
3221 If we cannot find a CPU that has exactly the
3222 characteristics of the default CPU and the given
3223 command line options we scan the array again looking
3224 for a best match. The best match must have at least
3225 the capabilities of the perfect match. */
3226 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3228 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3230 if (bitmap_subset_p (default_isa
, candidate_isa
))
3234 bitmap_and_compl (candidate_isa
, candidate_isa
,
3236 count
= bitmap_popcount (candidate_isa
);
3238 if (count
< current_bit_count
)
3241 current_bit_count
= count
;
3245 gcc_assert (best_fit
);
3249 arm_selected_cpu
= sel
;
3252 /* Now we know the CPU, we can finally initialize the target
3254 target
->core_name
= arm_selected_cpu
->common
.name
;
3255 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3256 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3258 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3261 gcc_assert (arm_selected_cpu
);
3262 gcc_assert (arm_selected_arch
);
3264 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3266 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3267 auto_sbitmap
fpu_bits (isa_num_bits
);
3269 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3270 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3271 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3274 if (!arm_selected_tune
)
3275 arm_selected_tune
= arm_selected_cpu
;
3276 else /* Validate the features passed to -mtune. */
3277 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3279 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3281 /* Finish initializing the target structure. */
3282 target
->arch_pp_name
= arm_selected_arch
->arch
;
3283 target
->base_arch
= arm_selected_arch
->base_arch
;
3284 target
->profile
= arm_selected_arch
->profile
;
3286 target
->tune_flags
= tune_data
->tune_flags
;
3287 target
->tune
= tune_data
->tune
;
3288 target
->tune_core
= tune_data
->scheduler
;
3291 /* Fix up any incompatible options that the user has specified. */
3293 arm_option_override (void)
3295 static const enum isa_feature fpu_bitlist
[]
3296 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3297 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3298 cl_target_option opts
;
3300 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3301 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3303 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3304 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3306 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3308 if (!global_options_set
.x_arm_fpu_index
)
3313 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3316 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3319 cl_target_option_save (&opts
, &global_options
);
3320 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3323 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3324 SUBTARGET_OVERRIDE_OPTIONS
;
3327 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3328 arm_base_arch
= arm_active_target
.base_arch
;
3330 arm_tune
= arm_active_target
.tune_core
;
3331 tune_flags
= arm_active_target
.tune_flags
;
3332 current_tune
= arm_active_target
.tune
;
3334 /* TBD: Dwarf info for apcs frame is not handled yet. */
3335 if (TARGET_APCS_FRAME
)
3336 flag_shrink_wrap
= false;
3338 /* BPABI targets use linker tricks to allow interworking on cores
3339 without thumb support. */
3340 if (TARGET_INTERWORK
3342 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3344 warning (0, "target CPU does not support interworking" );
3345 target_flags
&= ~MASK_INTERWORK
;
3348 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3350 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3351 target_flags
|= MASK_APCS_FRAME
;
3354 if (TARGET_POKE_FUNCTION_NAME
)
3355 target_flags
|= MASK_APCS_FRAME
;
3357 if (TARGET_APCS_REENT
&& flag_pic
)
3358 error ("-fpic and -mapcs-reent are incompatible");
3360 if (TARGET_APCS_REENT
)
3361 warning (0, "APCS reentrant code not supported. Ignored");
3363 /* Initialize boolean versions of the architectural flags, for use
3364 in the arm.md file. */
3365 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv3m
);
3366 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv4
);
3367 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3368 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5
);
3369 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5e
);
3370 arm_arch5te
= arm_arch5e
3371 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3372 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6
);
3373 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6k
);
3374 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3375 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3376 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7
);
3377 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7em
);
3378 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8
);
3379 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_1
);
3380 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_2
);
3381 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3382 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3383 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3384 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3385 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3386 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3387 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3388 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3389 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3390 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3391 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3394 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3395 error ("selected fp16 options are incompatible");
3396 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3400 /* Set up some tuning parameters. */
3401 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3402 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3403 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3404 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3405 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3406 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3408 /* And finally, set up some quirks. */
3409 arm_arch_no_volatile_ce
3410 = bitmap_bit_p (arm_active_target
.isa
, isa_quirk_no_volatile_ce
);
3412 = arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
, isa_quirk_ARMv6kz
);
3414 /* V5 code we generate is completely interworking capable, so we turn off
3415 TARGET_INTERWORK here to avoid many tests later on. */
3417 /* XXX However, we must pass the right pre-processor defines to CPP
3418 or GLD can get confused. This is a hack. */
3419 if (TARGET_INTERWORK
)
3420 arm_cpp_interwork
= 1;
3423 target_flags
&= ~MASK_INTERWORK
;
3425 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3426 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3428 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3429 error ("iwmmxt abi requires an iwmmxt capable cpu");
3431 /* If soft-float is specified then don't use FPU. */
3432 if (TARGET_SOFT_FLOAT
)
3433 arm_fpu_attr
= FPU_NONE
;
3435 arm_fpu_attr
= FPU_VFP
;
3437 if (TARGET_AAPCS_BASED
)
3439 if (TARGET_CALLER_INTERWORKING
)
3440 error ("AAPCS does not support -mcaller-super-interworking");
3442 if (TARGET_CALLEE_INTERWORKING
)
3443 error ("AAPCS does not support -mcallee-super-interworking");
3446 /* __fp16 support currently assumes the core has ldrh. */
3447 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3448 sorry ("__fp16 and no ldrh");
3450 if (TARGET_AAPCS_BASED
)
3452 if (arm_abi
== ARM_ABI_IWMMXT
)
3453 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3454 else if (TARGET_HARD_FLOAT_ABI
)
3456 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3457 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_VFPv2
))
3458 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3461 arm_pcs_default
= ARM_PCS_AAPCS
;
3465 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3466 sorry ("-mfloat-abi=hard and VFP");
3468 if (arm_abi
== ARM_ABI_APCS
)
3469 arm_pcs_default
= ARM_PCS_APCS
;
3471 arm_pcs_default
= ARM_PCS_ATPCS
;
3474 /* For arm2/3 there is no need to do any scheduling if we are doing
3475 software floating-point. */
3476 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3477 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3479 /* Use the cp15 method if it is available. */
3480 if (target_thread_pointer
== TP_AUTO
)
3482 if (arm_arch6k
&& !TARGET_THUMB1
)
3483 target_thread_pointer
= TP_CP15
;
3485 target_thread_pointer
= TP_SOFT
;
3488 /* Override the default structure alignment for AAPCS ABI. */
3489 if (!global_options_set
.x_arm_structure_size_boundary
)
3491 if (TARGET_AAPCS_BASED
)
3492 arm_structure_size_boundary
= 8;
3496 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3498 if (arm_structure_size_boundary
!= 8
3499 && arm_structure_size_boundary
!= 32
3500 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3502 if (ARM_DOUBLEWORD_ALIGN
)
3504 "structure size boundary can only be set to 8, 32 or 64");
3506 warning (0, "structure size boundary can only be set to 8 or 32");
3507 arm_structure_size_boundary
3508 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3512 if (TARGET_VXWORKS_RTP
)
3514 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3515 arm_pic_data_is_text_relative
= 0;
3518 && !arm_pic_data_is_text_relative
3519 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3520 /* When text & data segments don't have a fixed displacement, the
3521 intended use is with a single, read only, pic base register.
3522 Unless the user explicitly requested not to do that, set
3524 target_flags
|= MASK_SINGLE_PIC_BASE
;
3526 /* If stack checking is disabled, we can use r10 as the PIC register,
3527 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3528 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3530 if (TARGET_VXWORKS_RTP
)
3531 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3532 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3535 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3536 arm_pic_register
= 9;
3538 if (arm_pic_register_string
!= NULL
)
3540 int pic_register
= decode_reg_name (arm_pic_register_string
);
3543 warning (0, "-mpic-register= is useless without -fpic");
3545 /* Prevent the user from choosing an obviously stupid PIC register. */
3546 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3547 || pic_register
== HARD_FRAME_POINTER_REGNUM
3548 || pic_register
== STACK_POINTER_REGNUM
3549 || pic_register
>= PC_REGNUM
3550 || (TARGET_VXWORKS_RTP
3551 && (unsigned int) pic_register
!= arm_pic_register
))
3552 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3554 arm_pic_register
= pic_register
;
3557 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3558 if (fix_cm3_ldrd
== 2)
3560 if (bitmap_bit_p (arm_active_target
.isa
, isa_quirk_cm3_ldrd
))
3566 /* Hot/Cold partitioning is not currently supported, since we can't
3567 handle literal pool placement in that case. */
3568 if (flag_reorder_blocks_and_partition
)
3570 inform (input_location
,
3571 "-freorder-blocks-and-partition not supported on this architecture");
3572 flag_reorder_blocks_and_partition
= 0;
3573 flag_reorder_blocks
= 1;
3577 /* Hoisting PIC address calculations more aggressively provides a small,
3578 but measurable, size reduction for PIC code. Therefore, we decrease
3579 the bar for unrestricted expression hoisting to the cost of PIC address
3580 calculation, which is 2 instructions. */
3581 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3582 global_options
.x_param_values
,
3583 global_options_set
.x_param_values
);
3585 /* ARM EABI defaults to strict volatile bitfields. */
3586 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3587 && abi_version_at_least(2))
3588 flag_strict_volatile_bitfields
= 1;
3590 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3591 have deemed it beneficial (signified by setting
3592 prefetch.num_slots to 1 or more). */
3593 if (flag_prefetch_loop_arrays
< 0
3596 && current_tune
->prefetch
.num_slots
> 0)
3597 flag_prefetch_loop_arrays
= 1;
3599 /* Set up parameters to be used in prefetching algorithm. Do not
3600 override the defaults unless we are tuning for a core we have
3601 researched values for. */
3602 if (current_tune
->prefetch
.num_slots
> 0)
3603 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3604 current_tune
->prefetch
.num_slots
,
3605 global_options
.x_param_values
,
3606 global_options_set
.x_param_values
);
3607 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3608 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3609 current_tune
->prefetch
.l1_cache_line_size
,
3610 global_options
.x_param_values
,
3611 global_options_set
.x_param_values
);
3612 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3613 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3614 current_tune
->prefetch
.l1_cache_size
,
3615 global_options
.x_param_values
,
3616 global_options_set
.x_param_values
);
3618 /* Use Neon to perform 64-bits operations rather than core
3620 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3621 if (use_neon_for_64bits
== 1)
3622 prefer_neon_for_64bits
= true;
3624 /* Use the alternative scheduling-pressure algorithm by default. */
3625 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3626 global_options
.x_param_values
,
3627 global_options_set
.x_param_values
);
3629 /* Look through ready list and all of queue for instructions
3630 relevant for L2 auto-prefetcher. */
3631 int param_sched_autopref_queue_depth
;
3633 switch (current_tune
->sched_autopref
)
3635 case tune_params::SCHED_AUTOPREF_OFF
:
3636 param_sched_autopref_queue_depth
= -1;
3639 case tune_params::SCHED_AUTOPREF_RANK
:
3640 param_sched_autopref_queue_depth
= 0;
3643 case tune_params::SCHED_AUTOPREF_FULL
:
3644 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3651 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3652 param_sched_autopref_queue_depth
,
3653 global_options
.x_param_values
,
3654 global_options_set
.x_param_values
);
3656 /* Currently, for slow flash data, we just disable literal pools. We also
3657 disable it for pure-code. */
3658 if (target_slow_flash_data
|| target_pure_code
)
3659 arm_disable_literal_pool
= true;
3661 if (use_cmse
&& !arm_arch_cmse
)
3662 error ("target CPU does not support ARMv8-M Security Extensions");
3664 /* Disable scheduling fusion by default if it's not armv7 processor
3665 or doesn't prefer ldrd/strd. */
3666 if (flag_schedule_fusion
== 2
3667 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3668 flag_schedule_fusion
= 0;
3670 /* Need to remember initial options before they are overriden. */
3671 init_optimize
= build_optimization_node (&global_options
);
3673 arm_option_override_internal (&global_options
, &global_options_set
);
3674 arm_option_check_internal (&global_options
);
3675 arm_option_params_internal ();
3677 /* Create the default target_options structure. */
3678 target_option_default_node
= target_option_current_node
3679 = build_target_option_node (&global_options
);
3681 /* Register global variables with the garbage collector. */
3682 arm_add_gc_roots ();
3684 /* Init initial mode for testing. */
3685 thumb_flipper
= TARGET_THUMB
;
3689 arm_add_gc_roots (void)
3691 gcc_obstack_init(&minipool_obstack
);
3692 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3695 /* A table of known ARM exception types.
3696 For use with the interrupt function attribute. */
3700 const char *const arg
;
3701 const unsigned long return_value
;
3705 static const isr_attribute_arg isr_attribute_args
[] =
3707 { "IRQ", ARM_FT_ISR
},
3708 { "irq", ARM_FT_ISR
},
3709 { "FIQ", ARM_FT_FIQ
},
3710 { "fiq", ARM_FT_FIQ
},
3711 { "ABORT", ARM_FT_ISR
},
3712 { "abort", ARM_FT_ISR
},
3713 { "ABORT", ARM_FT_ISR
},
3714 { "abort", ARM_FT_ISR
},
3715 { "UNDEF", ARM_FT_EXCEPTION
},
3716 { "undef", ARM_FT_EXCEPTION
},
3717 { "SWI", ARM_FT_EXCEPTION
},
3718 { "swi", ARM_FT_EXCEPTION
},
3719 { NULL
, ARM_FT_NORMAL
}
3722 /* Returns the (interrupt) function type of the current
3723 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3725 static unsigned long
3726 arm_isr_value (tree argument
)
3728 const isr_attribute_arg
* ptr
;
3732 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3734 /* No argument - default to IRQ. */
3735 if (argument
== NULL_TREE
)
3738 /* Get the value of the argument. */
3739 if (TREE_VALUE (argument
) == NULL_TREE
3740 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3741 return ARM_FT_UNKNOWN
;
3743 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3745 /* Check it against the list of known arguments. */
3746 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3747 if (streq (arg
, ptr
->arg
))
3748 return ptr
->return_value
;
3750 /* An unrecognized interrupt type. */
3751 return ARM_FT_UNKNOWN
;
3754 /* Computes the type of the current function. */
3756 static unsigned long
3757 arm_compute_func_type (void)
3759 unsigned long type
= ARM_FT_UNKNOWN
;
3763 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3765 /* Decide if the current function is volatile. Such functions
3766 never return, and many memory cycles can be saved by not storing
3767 register values that will never be needed again. This optimization
3768 was added to speed up context switching in a kernel application. */
3770 && (TREE_NOTHROW (current_function_decl
)
3771 || !(flag_unwind_tables
3773 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3774 && TREE_THIS_VOLATILE (current_function_decl
))
3775 type
|= ARM_FT_VOLATILE
;
3777 if (cfun
->static_chain_decl
!= NULL
)
3778 type
|= ARM_FT_NESTED
;
3780 attr
= DECL_ATTRIBUTES (current_function_decl
);
3782 a
= lookup_attribute ("naked", attr
);
3784 type
|= ARM_FT_NAKED
;
3786 a
= lookup_attribute ("isr", attr
);
3788 a
= lookup_attribute ("interrupt", attr
);
3791 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3793 type
|= arm_isr_value (TREE_VALUE (a
));
3795 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3796 type
|= ARM_FT_CMSE_ENTRY
;
3801 /* Returns the type of the current function. */
3804 arm_current_func_type (void)
3806 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3807 cfun
->machine
->func_type
= arm_compute_func_type ();
3809 return cfun
->machine
->func_type
;
3813 arm_allocate_stack_slots_for_args (void)
3815 /* Naked functions should not allocate stack slots for arguments. */
3816 return !IS_NAKED (arm_current_func_type ());
3820 arm_warn_func_return (tree decl
)
3822 /* Naked functions are implemented entirely in assembly, including the
3823 return sequence, so suppress warnings about this. */
3824 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3828 /* Output assembler code for a block containing the constant parts
3829 of a trampoline, leaving space for the variable parts.
3831 On the ARM, (if r8 is the static chain regnum, and remembering that
3832 referencing pc adds an offset of 8) the trampoline looks like:
3835 .word static chain value
3836 .word function's address
3837 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3840 arm_asm_trampoline_template (FILE *f
)
3842 fprintf (f
, "\t.syntax unified\n");
3846 fprintf (f
, "\t.arm\n");
3847 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3848 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3850 else if (TARGET_THUMB2
)
3852 fprintf (f
, "\t.thumb\n");
3853 /* The Thumb-2 trampoline is similar to the arm implementation.
3854 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3855 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3856 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3857 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3861 ASM_OUTPUT_ALIGN (f
, 2);
3862 fprintf (f
, "\t.code\t16\n");
3863 fprintf (f
, ".Ltrampoline_start:\n");
3864 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3865 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3866 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3867 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3868 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3869 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3871 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3872 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3875 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3878 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3880 rtx fnaddr
, mem
, a_tramp
;
3882 emit_block_move (m_tramp
, assemble_trampoline_template (),
3883 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3885 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3886 emit_move_insn (mem
, chain_value
);
3888 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3889 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3890 emit_move_insn (mem
, fnaddr
);
3892 a_tramp
= XEXP (m_tramp
, 0);
3893 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3894 LCT_NORMAL
, VOIDmode
, a_tramp
, Pmode
,
3895 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3898 /* Thumb trampolines should be entered in thumb mode, so set
3899 the bottom bit of the address. */
3902 arm_trampoline_adjust_address (rtx addr
)
3905 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3906 NULL
, 0, OPTAB_LIB_WIDEN
);
3910 /* Return 1 if it is possible to return using a single instruction.
3911 If SIBLING is non-null, this is a test for a return before a sibling
3912 call. SIBLING is the call insn, so we can examine its register usage. */
3915 use_return_insn (int iscond
, rtx sibling
)
3918 unsigned int func_type
;
3919 unsigned long saved_int_regs
;
3920 unsigned HOST_WIDE_INT stack_adjust
;
3921 arm_stack_offsets
*offsets
;
3923 /* Never use a return instruction before reload has run. */
3924 if (!reload_completed
)
3927 func_type
= arm_current_func_type ();
3929 /* Naked, volatile and stack alignment functions need special
3931 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3934 /* So do interrupt functions that use the frame pointer and Thumb
3935 interrupt functions. */
3936 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3939 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3940 && !optimize_function_for_size_p (cfun
))
3943 offsets
= arm_get_frame_offsets ();
3944 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3946 /* As do variadic functions. */
3947 if (crtl
->args
.pretend_args_size
3948 || cfun
->machine
->uses_anonymous_args
3949 /* Or if the function calls __builtin_eh_return () */
3950 || crtl
->calls_eh_return
3951 /* Or if the function calls alloca */
3952 || cfun
->calls_alloca
3953 /* Or if there is a stack adjustment. However, if the stack pointer
3954 is saved on the stack, we can use a pre-incrementing stack load. */
3955 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3956 && stack_adjust
== 4))
3957 /* Or if the static chain register was saved above the frame, under the
3958 assumption that the stack pointer isn't saved on the stack. */
3959 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3960 && arm_compute_static_chain_stack_bytes() != 0))
3963 saved_int_regs
= offsets
->saved_regs_mask
;
3965 /* Unfortunately, the insn
3967 ldmib sp, {..., sp, ...}
3969 triggers a bug on most SA-110 based devices, such that the stack
3970 pointer won't be correctly restored if the instruction takes a
3971 page fault. We work around this problem by popping r3 along with
3972 the other registers, since that is never slower than executing
3973 another instruction.
3975 We test for !arm_arch5 here, because code for any architecture
3976 less than this could potentially be run on one of the buggy
3978 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3980 /* Validate that r3 is a call-clobbered register (always true in
3981 the default abi) ... */
3982 if (!call_used_regs
[3])
3985 /* ... that it isn't being used for a return value ... */
3986 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3989 /* ... or for a tail-call argument ... */
3992 gcc_assert (CALL_P (sibling
));
3994 if (find_regno_fusage (sibling
, USE
, 3))
3998 /* ... and that there are no call-saved registers in r0-r2
3999 (always true in the default ABI). */
4000 if (saved_int_regs
& 0x7)
4004 /* Can't be done if interworking with Thumb, and any registers have been
4006 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4009 /* On StrongARM, conditional returns are expensive if they aren't
4010 taken and multiple registers have been stacked. */
4011 if (iscond
&& arm_tune_strongarm
)
4013 /* Conditional return when just the LR is stored is a simple
4014 conditional-load instruction, that's not expensive. */
4015 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4019 && arm_pic_register
!= INVALID_REGNUM
4020 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4024 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4025 several instructions if anything needs to be popped. */
4026 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4029 /* If there are saved registers but the LR isn't saved, then we need
4030 two instructions for the return. */
4031 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4034 /* Can't be done if any of the VFP regs are pushed,
4035 since this also requires an insn. */
4036 if (TARGET_HARD_FLOAT
)
4037 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4038 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4041 if (TARGET_REALLY_IWMMXT
)
4042 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4043 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4049 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4050 shrink-wrapping if possible. This is the case if we need to emit a
4051 prologue, which we can test by looking at the offsets. */
4053 use_simple_return_p (void)
4055 arm_stack_offsets
*offsets
;
4057 /* Note this function can be called before or after reload. */
4058 if (!reload_completed
)
4059 arm_compute_frame_layout ();
4061 offsets
= arm_get_frame_offsets ();
4062 return offsets
->outgoing_args
!= 0;
4065 /* Return TRUE if int I is a valid immediate ARM constant. */
4068 const_ok_for_arm (HOST_WIDE_INT i
)
4072 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4073 be all zero, or all one. */
4074 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4075 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4076 != ((~(unsigned HOST_WIDE_INT
) 0)
4077 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4080 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4082 /* Fast return for 0 and small values. We must do this for zero, since
4083 the code below can't handle that one case. */
4084 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4087 /* Get the number of trailing zeros. */
4088 lowbit
= ffs((int) i
) - 1;
4090 /* Only even shifts are allowed in ARM mode so round down to the
4091 nearest even number. */
4095 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4100 /* Allow rotated constants in ARM mode. */
4102 && ((i
& ~0xc000003f) == 0
4103 || (i
& ~0xf000000f) == 0
4104 || (i
& ~0xfc000003) == 0))
4107 else if (TARGET_THUMB2
)
4111 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4114 if (i
== v
|| i
== (v
| (v
<< 8)))
4117 /* Allow repeated pattern 0xXY00XY00. */
4123 else if (TARGET_HAVE_MOVT
)
4125 /* Thumb-1 Targets with MOVT. */
4135 /* Return true if I is a valid constant for the operation CODE. */
4137 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4139 if (const_ok_for_arm (i
))
4145 /* See if we can use movw. */
4146 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4149 /* Otherwise, try mvn. */
4150 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4153 /* See if we can use addw or subw. */
4155 && ((i
& 0xfffff000) == 0
4156 || ((-i
) & 0xfffff000) == 0))
4177 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4179 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4185 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4189 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4196 /* Return true if I is a valid di mode constant for the operation CODE. */
4198 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4200 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4201 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4202 rtx hi
= GEN_INT (hi_val
);
4203 rtx lo
= GEN_INT (lo_val
);
4213 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4214 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4216 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4223 /* Emit a sequence of insns to handle a large constant.
4224 CODE is the code of the operation required, it can be any of SET, PLUS,
4225 IOR, AND, XOR, MINUS;
4226 MODE is the mode in which the operation is being performed;
4227 VAL is the integer to operate on;
4228 SOURCE is the other operand (a register, or a null-pointer for SET);
4229 SUBTARGETS means it is safe to create scratch registers if that will
4230 either produce a simpler sequence, or we will want to cse the values.
4231 Return value is the number of insns emitted. */
4233 /* ??? Tweak this for thumb2. */
4235 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4236 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4240 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4241 cond
= COND_EXEC_TEST (PATTERN (insn
));
4245 if (subtargets
|| code
== SET
4246 || (REG_P (target
) && REG_P (source
)
4247 && REGNO (target
) != REGNO (source
)))
4249 /* After arm_reorg has been called, we can't fix up expensive
4250 constants by pushing them into memory so we must synthesize
4251 them in-line, regardless of the cost. This is only likely to
4252 be more costly on chips that have load delay slots and we are
4253 compiling without running the scheduler (so no splitting
4254 occurred before the final instruction emission).
4256 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4258 if (!cfun
->machine
->after_arm_reorg
4260 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4262 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4267 /* Currently SET is the only monadic value for CODE, all
4268 the rest are diadic. */
4269 if (TARGET_USE_MOVT
)
4270 arm_emit_movpair (target
, GEN_INT (val
));
4272 emit_set_insn (target
, GEN_INT (val
));
4278 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4280 if (TARGET_USE_MOVT
)
4281 arm_emit_movpair (temp
, GEN_INT (val
));
4283 emit_set_insn (temp
, GEN_INT (val
));
4285 /* For MINUS, the value is subtracted from, since we never
4286 have subtraction of a constant. */
4288 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4290 emit_set_insn (target
,
4291 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4297 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4301 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4302 ARM/THUMB2 immediates, and add up to VAL.
4303 Thr function return value gives the number of insns required. */
4305 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4306 struct four_ints
*return_sequence
)
4308 int best_consecutive_zeros
= 0;
4312 struct four_ints tmp_sequence
;
4314 /* If we aren't targeting ARM, the best place to start is always at
4315 the bottom, otherwise look more closely. */
4318 for (i
= 0; i
< 32; i
+= 2)
4320 int consecutive_zeros
= 0;
4322 if (!(val
& (3 << i
)))
4324 while ((i
< 32) && !(val
& (3 << i
)))
4326 consecutive_zeros
+= 2;
4329 if (consecutive_zeros
> best_consecutive_zeros
)
4331 best_consecutive_zeros
= consecutive_zeros
;
4332 best_start
= i
- consecutive_zeros
;
4339 /* So long as it won't require any more insns to do so, it's
4340 desirable to emit a small constant (in bits 0...9) in the last
4341 insn. This way there is more chance that it can be combined with
4342 a later addressing insn to form a pre-indexed load or store
4343 operation. Consider:
4345 *((volatile int *)0xe0000100) = 1;
4346 *((volatile int *)0xe0000110) = 2;
4348 We want this to wind up as:
4352 str rB, [rA, #0x100]
4354 str rB, [rA, #0x110]
4356 rather than having to synthesize both large constants from scratch.
4358 Therefore, we calculate how many insns would be required to emit
4359 the constant starting from `best_start', and also starting from
4360 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4361 yield a shorter sequence, we may as well use zero. */
4362 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4364 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4366 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4367 if (insns2
<= insns1
)
4369 *return_sequence
= tmp_sequence
;
4377 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4379 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4380 struct four_ints
*return_sequence
, int i
)
4382 int remainder
= val
& 0xffffffff;
4385 /* Try and find a way of doing the job in either two or three
4388 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4389 location. We start at position I. This may be the MSB, or
4390 optimial_immediate_sequence may have positioned it at the largest block
4391 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4392 wrapping around to the top of the word when we drop off the bottom.
4393 In the worst case this code should produce no more than four insns.
4395 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4396 constants, shifted to any arbitrary location. We should always start
4401 unsigned int b1
, b2
, b3
, b4
;
4402 unsigned HOST_WIDE_INT result
;
4405 gcc_assert (insns
< 4);
4410 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4411 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4414 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4415 /* We can use addw/subw for the last 12 bits. */
4419 /* Use an 8-bit shifted/rotated immediate. */
4423 result
= remainder
& ((0x0ff << end
)
4424 | ((i
< end
) ? (0xff >> (32 - end
))
4431 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4432 arbitrary shifts. */
4433 i
-= TARGET_ARM
? 2 : 1;
4437 /* Next, see if we can do a better job with a thumb2 replicated
4440 We do it this way around to catch the cases like 0x01F001E0 where
4441 two 8-bit immediates would work, but a replicated constant would
4444 TODO: 16-bit constants that don't clear all the bits, but still win.
4445 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4448 b1
= (remainder
& 0xff000000) >> 24;
4449 b2
= (remainder
& 0x00ff0000) >> 16;
4450 b3
= (remainder
& 0x0000ff00) >> 8;
4451 b4
= remainder
& 0xff;
4455 /* The 8-bit immediate already found clears b1 (and maybe b2),
4456 but must leave b3 and b4 alone. */
4458 /* First try to find a 32-bit replicated constant that clears
4459 almost everything. We can assume that we can't do it in one,
4460 or else we wouldn't be here. */
4461 unsigned int tmp
= b1
& b2
& b3
& b4
;
4462 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4464 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4465 + (tmp
== b3
) + (tmp
== b4
);
4467 && (matching_bytes
>= 3
4468 || (matching_bytes
== 2
4469 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4471 /* At least 3 of the bytes match, and the fourth has at
4472 least as many bits set, or two of the bytes match
4473 and it will only require one more insn to finish. */
4481 /* Second, try to find a 16-bit replicated constant that can
4482 leave three of the bytes clear. If b2 or b4 is already
4483 zero, then we can. If the 8-bit from above would not
4484 clear b2 anyway, then we still win. */
4485 else if (b1
== b3
&& (!b2
|| !b4
4486 || (remainder
& 0x00ff0000 & ~result
)))
4488 result
= remainder
& 0xff00ff00;
4494 /* The 8-bit immediate already found clears b2 (and maybe b3)
4495 and we don't get here unless b1 is alredy clear, but it will
4496 leave b4 unchanged. */
4498 /* If we can clear b2 and b4 at once, then we win, since the
4499 8-bits couldn't possibly reach that far. */
4502 result
= remainder
& 0x00ff00ff;
4508 return_sequence
->i
[insns
++] = result
;
4509 remainder
&= ~result
;
4511 if (code
== SET
|| code
== MINUS
)
4519 /* Emit an instruction with the indicated PATTERN. If COND is
4520 non-NULL, conditionalize the execution of the instruction on COND
4524 emit_constant_insn (rtx cond
, rtx pattern
)
4527 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4528 emit_insn (pattern
);
4531 /* As above, but extra parameter GENERATE which, if clear, suppresses
4535 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4536 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4537 int subtargets
, int generate
)
4541 int final_invert
= 0;
4543 int set_sign_bit_copies
= 0;
4544 int clear_sign_bit_copies
= 0;
4545 int clear_zero_bit_copies
= 0;
4546 int set_zero_bit_copies
= 0;
4547 int insns
= 0, neg_insns
, inv_insns
;
4548 unsigned HOST_WIDE_INT temp1
, temp2
;
4549 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4550 struct four_ints
*immediates
;
4551 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4553 /* Find out which operations are safe for a given CODE. Also do a quick
4554 check for degenerate cases; these can occur when DImode operations
4567 if (remainder
== 0xffffffff)
4570 emit_constant_insn (cond
,
4571 gen_rtx_SET (target
,
4572 GEN_INT (ARM_SIGN_EXTEND (val
))));
4578 if (reload_completed
&& rtx_equal_p (target
, source
))
4582 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4591 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4594 if (remainder
== 0xffffffff)
4596 if (reload_completed
&& rtx_equal_p (target
, source
))
4599 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4608 if (reload_completed
&& rtx_equal_p (target
, source
))
4611 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4615 if (remainder
== 0xffffffff)
4618 emit_constant_insn (cond
,
4619 gen_rtx_SET (target
,
4620 gen_rtx_NOT (mode
, source
)));
4627 /* We treat MINUS as (val - source), since (source - val) is always
4628 passed as (source + (-val)). */
4632 emit_constant_insn (cond
,
4633 gen_rtx_SET (target
,
4634 gen_rtx_NEG (mode
, source
)));
4637 if (const_ok_for_arm (val
))
4640 emit_constant_insn (cond
,
4641 gen_rtx_SET (target
,
4642 gen_rtx_MINUS (mode
, GEN_INT (val
),
4653 /* If we can do it in one insn get out quickly. */
4654 if (const_ok_for_op (val
, code
))
4657 emit_constant_insn (cond
,
4658 gen_rtx_SET (target
,
4660 ? gen_rtx_fmt_ee (code
, mode
, source
,
4666 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4668 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4669 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4673 if (mode
== SImode
&& i
== 16)
4674 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4676 emit_constant_insn (cond
,
4677 gen_zero_extendhisi2
4678 (target
, gen_lowpart (HImode
, source
)));
4680 /* Extz only supports SImode, but we can coerce the operands
4682 emit_constant_insn (cond
,
4683 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4684 gen_lowpart (SImode
, source
),
4685 GEN_INT (i
), const0_rtx
));
4691 /* Calculate a few attributes that may be useful for specific
4693 /* Count number of leading zeros. */
4694 for (i
= 31; i
>= 0; i
--)
4696 if ((remainder
& (1 << i
)) == 0)
4697 clear_sign_bit_copies
++;
4702 /* Count number of leading 1's. */
4703 for (i
= 31; i
>= 0; i
--)
4705 if ((remainder
& (1 << i
)) != 0)
4706 set_sign_bit_copies
++;
4711 /* Count number of trailing zero's. */
4712 for (i
= 0; i
<= 31; i
++)
4714 if ((remainder
& (1 << i
)) == 0)
4715 clear_zero_bit_copies
++;
4720 /* Count number of trailing 1's. */
4721 for (i
= 0; i
<= 31; i
++)
4723 if ((remainder
& (1 << i
)) != 0)
4724 set_zero_bit_copies
++;
4732 /* See if we can do this by sign_extending a constant that is known
4733 to be negative. This is a good, way of doing it, since the shift
4734 may well merge into a subsequent insn. */
4735 if (set_sign_bit_copies
> 1)
4737 if (const_ok_for_arm
4738 (temp1
= ARM_SIGN_EXTEND (remainder
4739 << (set_sign_bit_copies
- 1))))
4743 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4744 emit_constant_insn (cond
,
4745 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4746 emit_constant_insn (cond
,
4747 gen_ashrsi3 (target
, new_src
,
4748 GEN_INT (set_sign_bit_copies
- 1)));
4752 /* For an inverted constant, we will need to set the low bits,
4753 these will be shifted out of harm's way. */
4754 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4755 if (const_ok_for_arm (~temp1
))
4759 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4760 emit_constant_insn (cond
,
4761 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4762 emit_constant_insn (cond
,
4763 gen_ashrsi3 (target
, new_src
,
4764 GEN_INT (set_sign_bit_copies
- 1)));
4770 /* See if we can calculate the value as the difference between two
4771 valid immediates. */
4772 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4774 int topshift
= clear_sign_bit_copies
& ~1;
4776 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4777 & (0xff000000 >> topshift
));
4779 /* If temp1 is zero, then that means the 9 most significant
4780 bits of remainder were 1 and we've caused it to overflow.
4781 When topshift is 0 we don't need to do anything since we
4782 can borrow from 'bit 32'. */
4783 if (temp1
== 0 && topshift
!= 0)
4784 temp1
= 0x80000000 >> (topshift
- 1);
4786 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4788 if (const_ok_for_arm (temp2
))
4792 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4793 emit_constant_insn (cond
,
4794 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4795 emit_constant_insn (cond
,
4796 gen_addsi3 (target
, new_src
,
4804 /* See if we can generate this by setting the bottom (or the top)
4805 16 bits, and then shifting these into the other half of the
4806 word. We only look for the simplest cases, to do more would cost
4807 too much. Be careful, however, not to generate this when the
4808 alternative would take fewer insns. */
4809 if (val
& 0xffff0000)
4811 temp1
= remainder
& 0xffff0000;
4812 temp2
= remainder
& 0x0000ffff;
4814 /* Overlaps outside this range are best done using other methods. */
4815 for (i
= 9; i
< 24; i
++)
4817 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4818 && !const_ok_for_arm (temp2
))
4820 rtx new_src
= (subtargets
4821 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4823 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4824 source
, subtargets
, generate
);
4832 gen_rtx_ASHIFT (mode
, source
,
4839 /* Don't duplicate cases already considered. */
4840 for (i
= 17; i
< 24; i
++)
4842 if (((temp1
| (temp1
>> i
)) == remainder
)
4843 && !const_ok_for_arm (temp1
))
4845 rtx new_src
= (subtargets
4846 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4848 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4849 source
, subtargets
, generate
);
4854 gen_rtx_SET (target
,
4857 gen_rtx_LSHIFTRT (mode
, source
,
4868 /* If we have IOR or XOR, and the constant can be loaded in a
4869 single instruction, and we can find a temporary to put it in,
4870 then this can be done in two instructions instead of 3-4. */
4872 /* TARGET can't be NULL if SUBTARGETS is 0 */
4873 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4875 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4879 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4881 emit_constant_insn (cond
,
4882 gen_rtx_SET (sub
, GEN_INT (val
)));
4883 emit_constant_insn (cond
,
4884 gen_rtx_SET (target
,
4885 gen_rtx_fmt_ee (code
, mode
,
4896 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4897 and the remainder 0s for e.g. 0xfff00000)
4898 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4900 This can be done in 2 instructions by using shifts with mov or mvn.
4905 mvn r0, r0, lsr #12 */
4906 if (set_sign_bit_copies
> 8
4907 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4911 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4912 rtx shift
= GEN_INT (set_sign_bit_copies
);
4918 gen_rtx_ASHIFT (mode
,
4923 gen_rtx_SET (target
,
4925 gen_rtx_LSHIFTRT (mode
, sub
,
4932 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4934 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4936 For eg. r0 = r0 | 0xfff
4941 if (set_zero_bit_copies
> 8
4942 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4946 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4947 rtx shift
= GEN_INT (set_zero_bit_copies
);
4953 gen_rtx_LSHIFTRT (mode
,
4958 gen_rtx_SET (target
,
4960 gen_rtx_ASHIFT (mode
, sub
,
4966 /* This will never be reached for Thumb2 because orn is a valid
4967 instruction. This is for Thumb1 and the ARM 32 bit cases.
4969 x = y | constant (such that ~constant is a valid constant)
4971 x = ~(~y & ~constant).
4973 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4977 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4978 emit_constant_insn (cond
,
4980 gen_rtx_NOT (mode
, source
)));
4983 sub
= gen_reg_rtx (mode
);
4984 emit_constant_insn (cond
,
4986 gen_rtx_AND (mode
, source
,
4988 emit_constant_insn (cond
,
4989 gen_rtx_SET (target
,
4990 gen_rtx_NOT (mode
, sub
)));
4997 /* See if two shifts will do 2 or more insn's worth of work. */
4998 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5000 HOST_WIDE_INT shift_mask
= ((0xffffffff
5001 << (32 - clear_sign_bit_copies
))
5004 if ((remainder
| shift_mask
) != 0xffffffff)
5006 HOST_WIDE_INT new_val
5007 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5011 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5012 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5013 new_src
, source
, subtargets
, 1);
5018 rtx targ
= subtargets
? NULL_RTX
: target
;
5019 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5020 targ
, source
, subtargets
, 0);
5026 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5027 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5029 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5030 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5036 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5038 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5040 if ((remainder
| shift_mask
) != 0xffffffff)
5042 HOST_WIDE_INT new_val
5043 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5046 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5048 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5049 new_src
, source
, subtargets
, 1);
5054 rtx targ
= subtargets
? NULL_RTX
: target
;
5056 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5057 targ
, source
, subtargets
, 0);
5063 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5064 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5066 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5067 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5079 /* Calculate what the instruction sequences would be if we generated it
5080 normally, negated, or inverted. */
5082 /* AND cannot be split into multiple insns, so invert and use BIC. */
5085 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5088 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5093 if (can_invert
|| final_invert
)
5094 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5099 immediates
= &pos_immediates
;
5101 /* Is the negated immediate sequence more efficient? */
5102 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5105 immediates
= &neg_immediates
;
5110 /* Is the inverted immediate sequence more efficient?
5111 We must allow for an extra NOT instruction for XOR operations, although
5112 there is some chance that the final 'mvn' will get optimized later. */
5113 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5116 immediates
= &inv_immediates
;
5124 /* Now output the chosen sequence as instructions. */
5127 for (i
= 0; i
< insns
; i
++)
5129 rtx new_src
, temp1_rtx
;
5131 temp1
= immediates
->i
[i
];
5133 if (code
== SET
|| code
== MINUS
)
5134 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5135 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5136 new_src
= gen_reg_rtx (mode
);
5142 else if (can_negate
)
5145 temp1
= trunc_int_for_mode (temp1
, mode
);
5146 temp1_rtx
= GEN_INT (temp1
);
5150 else if (code
== MINUS
)
5151 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5153 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5155 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5160 can_negate
= can_invert
;
5164 else if (code
== MINUS
)
5172 emit_constant_insn (cond
, gen_rtx_SET (target
,
5173 gen_rtx_NOT (mode
, source
)));
5180 /* Canonicalize a comparison so that we are more likely to recognize it.
5181 This can be done for a few constant compares, where we can make the
5182 immediate value easier to load. */
5185 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5186 bool op0_preserve_value
)
5189 unsigned HOST_WIDE_INT i
, maxval
;
5191 mode
= GET_MODE (*op0
);
5192 if (mode
== VOIDmode
)
5193 mode
= GET_MODE (*op1
);
5195 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5197 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5198 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5199 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5200 for GTU/LEU in Thumb mode. */
5204 if (*code
== GT
|| *code
== LE
5205 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5207 /* Missing comparison. First try to use an available
5209 if (CONST_INT_P (*op1
))
5217 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5219 *op1
= GEN_INT (i
+ 1);
5220 *code
= *code
== GT
? GE
: LT
;
5226 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5227 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5229 *op1
= GEN_INT (i
+ 1);
5230 *code
= *code
== GTU
? GEU
: LTU
;
5239 /* If that did not work, reverse the condition. */
5240 if (!op0_preserve_value
)
5242 std::swap (*op0
, *op1
);
5243 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5249 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5250 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5251 to facilitate possible combining with a cmp into 'ands'. */
5253 && GET_CODE (*op0
) == ZERO_EXTEND
5254 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5255 && GET_MODE (XEXP (*op0
, 0)) == QImode
5256 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5257 && subreg_lowpart_p (XEXP (*op0
, 0))
5258 && *op1
== const0_rtx
)
5259 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5262 /* Comparisons smaller than DImode. Only adjust comparisons against
5263 an out-of-range constant. */
5264 if (!CONST_INT_P (*op1
)
5265 || const_ok_for_arm (INTVAL (*op1
))
5266 || const_ok_for_arm (- INTVAL (*op1
)))
5280 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5282 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5283 *code
= *code
== GT
? GE
: LT
;
5291 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5293 *op1
= GEN_INT (i
- 1);
5294 *code
= *code
== GE
? GT
: LE
;
5301 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5302 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5304 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5305 *code
= *code
== GTU
? GEU
: LTU
;
5313 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5315 *op1
= GEN_INT (i
- 1);
5316 *code
= *code
== GEU
? GTU
: LEU
;
5327 /* Define how to find the value returned by a function. */
5330 arm_function_value(const_tree type
, const_tree func
,
5331 bool outgoing ATTRIBUTE_UNUSED
)
5334 int unsignedp ATTRIBUTE_UNUSED
;
5335 rtx r ATTRIBUTE_UNUSED
;
5337 mode
= TYPE_MODE (type
);
5339 if (TARGET_AAPCS_BASED
)
5340 return aapcs_allocate_return_reg (mode
, type
, func
);
5342 /* Promote integer types. */
5343 if (INTEGRAL_TYPE_P (type
))
5344 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5346 /* Promotes small structs returned in a register to full-word size
5347 for big-endian AAPCS. */
5348 if (arm_return_in_msb (type
))
5350 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5351 if (size
% UNITS_PER_WORD
!= 0)
5353 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5354 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5358 return arm_libcall_value_1 (mode
);
5361 /* libcall hashtable helpers. */
5363 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5365 static inline hashval_t
hash (const rtx_def
*);
5366 static inline bool equal (const rtx_def
*, const rtx_def
*);
5367 static inline void remove (rtx_def
*);
5371 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5373 return rtx_equal_p (p1
, p2
);
5377 libcall_hasher::hash (const rtx_def
*p1
)
5379 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5382 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5385 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5387 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5391 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5393 static bool init_done
= false;
5394 static libcall_table_type
*libcall_htab
= NULL
;
5400 libcall_htab
= new libcall_table_type (31);
5401 add_libcall (libcall_htab
,
5402 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5403 add_libcall (libcall_htab
,
5404 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5405 add_libcall (libcall_htab
,
5406 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5407 add_libcall (libcall_htab
,
5408 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5410 add_libcall (libcall_htab
,
5411 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5412 add_libcall (libcall_htab
,
5413 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5414 add_libcall (libcall_htab
,
5415 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5416 add_libcall (libcall_htab
,
5417 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5419 add_libcall (libcall_htab
,
5420 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5421 add_libcall (libcall_htab
,
5422 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5423 add_libcall (libcall_htab
,
5424 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5425 add_libcall (libcall_htab
,
5426 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5427 add_libcall (libcall_htab
,
5428 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5429 add_libcall (libcall_htab
,
5430 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5431 add_libcall (libcall_htab
,
5432 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5433 add_libcall (libcall_htab
,
5434 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5436 /* Values from double-precision helper functions are returned in core
5437 registers if the selected core only supports single-precision
5438 arithmetic, even if we are using the hard-float ABI. The same is
5439 true for single-precision helpers, but we will never be using the
5440 hard-float ABI on a CPU which doesn't support single-precision
5441 operations in hardware. */
5442 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5443 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5444 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5445 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5446 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5447 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5448 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5449 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5450 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5451 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5452 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5453 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5455 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5457 add_libcall (libcall_htab
,
5458 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5461 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5465 arm_libcall_value_1 (machine_mode mode
)
5467 if (TARGET_AAPCS_BASED
)
5468 return aapcs_libcall_value (mode
);
5469 else if (TARGET_IWMMXT_ABI
5470 && arm_vector_mode_supported_p (mode
))
5471 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5473 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5476 /* Define how to find the value returned by a library function
5477 assuming the value has mode MODE. */
5480 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5482 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5483 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5485 /* The following libcalls return their result in integer registers,
5486 even though they return a floating point value. */
5487 if (arm_libcall_uses_aapcs_base (libcall
))
5488 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5492 return arm_libcall_value_1 (mode
);
5495 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5498 arm_function_value_regno_p (const unsigned int regno
)
5500 if (regno
== ARG_REGISTER (1)
5502 && TARGET_AAPCS_BASED
5503 && TARGET_HARD_FLOAT
5504 && regno
== FIRST_VFP_REGNUM
)
5505 || (TARGET_IWMMXT_ABI
5506 && regno
== FIRST_IWMMXT_REGNUM
))
5512 /* Determine the amount of memory needed to store the possible return
5513 registers of an untyped call. */
5515 arm_apply_result_size (void)
5521 if (TARGET_HARD_FLOAT_ABI
)
5523 if (TARGET_IWMMXT_ABI
)
5530 /* Decide whether TYPE should be returned in memory (true)
5531 or in a register (false). FNTYPE is the type of the function making
5534 arm_return_in_memory (const_tree type
, const_tree fntype
)
5538 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5540 if (TARGET_AAPCS_BASED
)
5542 /* Simple, non-aggregate types (ie not including vectors and
5543 complex) are always returned in a register (or registers).
5544 We don't care about which register here, so we can short-cut
5545 some of the detail. */
5546 if (!AGGREGATE_TYPE_P (type
)
5547 && TREE_CODE (type
) != VECTOR_TYPE
5548 && TREE_CODE (type
) != COMPLEX_TYPE
)
5551 /* Any return value that is no larger than one word can be
5553 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5556 /* Check any available co-processors to see if they accept the
5557 type as a register candidate (VFP, for example, can return
5558 some aggregates in consecutive registers). These aren't
5559 available if the call is variadic. */
5560 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5563 /* Vector values should be returned using ARM registers, not
5564 memory (unless they're over 16 bytes, which will break since
5565 we only have four call-clobbered registers to play with). */
5566 if (TREE_CODE (type
) == VECTOR_TYPE
)
5567 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5569 /* The rest go in memory. */
5573 if (TREE_CODE (type
) == VECTOR_TYPE
)
5574 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5576 if (!AGGREGATE_TYPE_P (type
) &&
5577 (TREE_CODE (type
) != VECTOR_TYPE
))
5578 /* All simple types are returned in registers. */
5581 if (arm_abi
!= ARM_ABI_APCS
)
5583 /* ATPCS and later return aggregate types in memory only if they are
5584 larger than a word (or are variable size). */
5585 return (size
< 0 || size
> UNITS_PER_WORD
);
5588 /* For the arm-wince targets we choose to be compatible with Microsoft's
5589 ARM and Thumb compilers, which always return aggregates in memory. */
5591 /* All structures/unions bigger than one word are returned in memory.
5592 Also catch the case where int_size_in_bytes returns -1. In this case
5593 the aggregate is either huge or of variable size, and in either case
5594 we will want to return it via memory and not in a register. */
5595 if (size
< 0 || size
> UNITS_PER_WORD
)
5598 if (TREE_CODE (type
) == RECORD_TYPE
)
5602 /* For a struct the APCS says that we only return in a register
5603 if the type is 'integer like' and every addressable element
5604 has an offset of zero. For practical purposes this means
5605 that the structure can have at most one non bit-field element
5606 and that this element must be the first one in the structure. */
5608 /* Find the first field, ignoring non FIELD_DECL things which will
5609 have been created by C++. */
5610 for (field
= TYPE_FIELDS (type
);
5611 field
&& TREE_CODE (field
) != FIELD_DECL
;
5612 field
= DECL_CHAIN (field
))
5616 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5618 /* Check that the first field is valid for returning in a register. */
5620 /* ... Floats are not allowed */
5621 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5624 /* ... Aggregates that are not themselves valid for returning in
5625 a register are not allowed. */
5626 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5629 /* Now check the remaining fields, if any. Only bitfields are allowed,
5630 since they are not addressable. */
5631 for (field
= DECL_CHAIN (field
);
5633 field
= DECL_CHAIN (field
))
5635 if (TREE_CODE (field
) != FIELD_DECL
)
5638 if (!DECL_BIT_FIELD_TYPE (field
))
5645 if (TREE_CODE (type
) == UNION_TYPE
)
5649 /* Unions can be returned in registers if every element is
5650 integral, or can be returned in an integer register. */
5651 for (field
= TYPE_FIELDS (type
);
5653 field
= DECL_CHAIN (field
))
5655 if (TREE_CODE (field
) != FIELD_DECL
)
5658 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5661 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5667 #endif /* not ARM_WINCE */
5669 /* Return all other types in memory. */
5673 const struct pcs_attribute_arg
5677 } pcs_attribute_args
[] =
5679 {"aapcs", ARM_PCS_AAPCS
},
5680 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5682 /* We could recognize these, but changes would be needed elsewhere
5683 * to implement them. */
5684 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5685 {"atpcs", ARM_PCS_ATPCS
},
5686 {"apcs", ARM_PCS_APCS
},
5688 {NULL
, ARM_PCS_UNKNOWN
}
5692 arm_pcs_from_attribute (tree attr
)
5694 const struct pcs_attribute_arg
*ptr
;
5697 /* Get the value of the argument. */
5698 if (TREE_VALUE (attr
) == NULL_TREE
5699 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5700 return ARM_PCS_UNKNOWN
;
5702 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5704 /* Check it against the list of known arguments. */
5705 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5706 if (streq (arg
, ptr
->arg
))
5709 /* An unrecognized interrupt type. */
5710 return ARM_PCS_UNKNOWN
;
5713 /* Get the PCS variant to use for this call. TYPE is the function's type
5714 specification, DECL is the specific declartion. DECL may be null if
5715 the call could be indirect or if this is a library call. */
5717 arm_get_pcs_model (const_tree type
, const_tree decl
)
5719 bool user_convention
= false;
5720 enum arm_pcs user_pcs
= arm_pcs_default
;
5725 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5728 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5729 user_convention
= true;
5732 if (TARGET_AAPCS_BASED
)
5734 /* Detect varargs functions. These always use the base rules
5735 (no argument is ever a candidate for a co-processor
5737 bool base_rules
= stdarg_p (type
);
5739 if (user_convention
)
5741 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5742 sorry ("non-AAPCS derived PCS variant");
5743 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5744 error ("variadic functions must use the base AAPCS variant");
5748 return ARM_PCS_AAPCS
;
5749 else if (user_convention
)
5751 else if (decl
&& flag_unit_at_a_time
)
5753 /* Local functions never leak outside this compilation unit,
5754 so we are free to use whatever conventions are
5756 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5757 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5759 return ARM_PCS_AAPCS_LOCAL
;
5762 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5763 sorry ("PCS variant");
5765 /* For everything else we use the target's default. */
5766 return arm_pcs_default
;
5771 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5772 const_tree fntype ATTRIBUTE_UNUSED
,
5773 rtx libcall ATTRIBUTE_UNUSED
,
5774 const_tree fndecl ATTRIBUTE_UNUSED
)
5776 /* Record the unallocated VFP registers. */
5777 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5778 pcum
->aapcs_vfp_reg_alloc
= 0;
5781 /* Walk down the type tree of TYPE counting consecutive base elements.
5782 If *MODEP is VOIDmode, then set it to the first valid floating point
5783 type. If a non-floating point type is found, or if a floating point
5784 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5785 otherwise return the count in the sub-tree. */
5787 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5792 switch (TREE_CODE (type
))
5795 mode
= TYPE_MODE (type
);
5796 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5799 if (*modep
== VOIDmode
)
5808 mode
= TYPE_MODE (TREE_TYPE (type
));
5809 if (mode
!= DFmode
&& mode
!= SFmode
)
5812 if (*modep
== VOIDmode
)
5821 /* Use V2SImode and V4SImode as representatives of all 64-bit
5822 and 128-bit vector types, whether or not those modes are
5823 supported with the present options. */
5824 size
= int_size_in_bytes (type
);
5837 if (*modep
== VOIDmode
)
5840 /* Vector modes are considered to be opaque: two vectors are
5841 equivalent for the purposes of being homogeneous aggregates
5842 if they are the same size. */
5851 tree index
= TYPE_DOMAIN (type
);
5853 /* Can't handle incomplete types nor sizes that are not
5855 if (!COMPLETE_TYPE_P (type
)
5856 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5859 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5862 || !TYPE_MAX_VALUE (index
)
5863 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5864 || !TYPE_MIN_VALUE (index
)
5865 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5869 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5870 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5872 /* There must be no padding. */
5873 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5885 /* Can't handle incomplete types nor sizes that are not
5887 if (!COMPLETE_TYPE_P (type
)
5888 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5891 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5893 if (TREE_CODE (field
) != FIELD_DECL
)
5896 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5902 /* There must be no padding. */
5903 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5910 case QUAL_UNION_TYPE
:
5912 /* These aren't very interesting except in a degenerate case. */
5917 /* Can't handle incomplete types nor sizes that are not
5919 if (!COMPLETE_TYPE_P (type
)
5920 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5923 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5925 if (TREE_CODE (field
) != FIELD_DECL
)
5928 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5931 count
= count
> sub_count
? count
: sub_count
;
5934 /* There must be no padding. */
5935 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5948 /* Return true if PCS_VARIANT should use VFP registers. */
5950 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5952 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5954 static bool seen_thumb1_vfp
= false;
5956 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5958 sorry ("Thumb-1 hard-float VFP ABI");
5959 /* sorry() is not immediately fatal, so only display this once. */
5960 seen_thumb1_vfp
= true;
5966 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5969 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5970 (TARGET_VFP_DOUBLE
|| !is_double
));
5973 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5974 suitable for passing or returning in VFP registers for the PCS
5975 variant selected. If it is, then *BASE_MODE is updated to contain
5976 a machine mode describing each element of the argument's type and
5977 *COUNT to hold the number of such elements. */
5979 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5980 machine_mode mode
, const_tree type
,
5981 machine_mode
*base_mode
, int *count
)
5983 machine_mode new_mode
= VOIDmode
;
5985 /* If we have the type information, prefer that to working things
5986 out from the mode. */
5989 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5991 if (ag_count
> 0 && ag_count
<= 4)
5996 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5997 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5998 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6003 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6006 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6012 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6015 *base_mode
= new_mode
;
6020 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6021 machine_mode mode
, const_tree type
)
6023 int count ATTRIBUTE_UNUSED
;
6024 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6026 if (!use_vfp_abi (pcs_variant
, false))
6028 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6033 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6036 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6039 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6040 &pcum
->aapcs_vfp_rmode
,
6041 &pcum
->aapcs_vfp_rcount
);
6044 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6045 for the behaviour of this function. */
6048 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6049 const_tree type ATTRIBUTE_UNUSED
)
6052 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6053 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6054 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6057 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6058 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6060 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6062 || (mode
== TImode
&& ! TARGET_NEON
)
6063 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6066 int rcount
= pcum
->aapcs_vfp_rcount
;
6068 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6072 /* Avoid using unsupported vector modes. */
6073 if (rmode
== V2SImode
)
6075 else if (rmode
== V4SImode
)
6082 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6083 for (i
= 0; i
< rcount
; i
++)
6085 rtx tmp
= gen_rtx_REG (rmode
,
6086 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6087 tmp
= gen_rtx_EXPR_LIST
6089 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6090 XVECEXP (par
, 0, i
) = tmp
;
6093 pcum
->aapcs_reg
= par
;
6096 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6102 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6103 comment there for the behaviour of this function. */
6106 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6108 const_tree type ATTRIBUTE_UNUSED
)
6110 if (!use_vfp_abi (pcs_variant
, false))
6114 || (GET_MODE_CLASS (mode
) == MODE_INT
6115 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6119 machine_mode ag_mode
;
6124 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6129 if (ag_mode
== V2SImode
)
6131 else if (ag_mode
== V4SImode
)
6137 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6138 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6139 for (i
= 0; i
< count
; i
++)
6141 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6142 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6143 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6144 XVECEXP (par
, 0, i
) = tmp
;
6150 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6154 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6155 machine_mode mode ATTRIBUTE_UNUSED
,
6156 const_tree type ATTRIBUTE_UNUSED
)
6158 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6159 pcum
->aapcs_vfp_reg_alloc
= 0;
6163 #define AAPCS_CP(X) \
6165 aapcs_ ## X ## _cum_init, \
6166 aapcs_ ## X ## _is_call_candidate, \
6167 aapcs_ ## X ## _allocate, \
6168 aapcs_ ## X ## _is_return_candidate, \
6169 aapcs_ ## X ## _allocate_return_reg, \
6170 aapcs_ ## X ## _advance \
6173 /* Table of co-processors that can be used to pass arguments in
6174 registers. Idealy no arugment should be a candidate for more than
6175 one co-processor table entry, but the table is processed in order
6176 and stops after the first match. If that entry then fails to put
6177 the argument into a co-processor register, the argument will go on
6181 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6182 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6184 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6185 BLKmode) is a candidate for this co-processor's registers; this
6186 function should ignore any position-dependent state in
6187 CUMULATIVE_ARGS and only use call-type dependent information. */
6188 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6190 /* Return true if the argument does get a co-processor register; it
6191 should set aapcs_reg to an RTX of the register allocated as is
6192 required for a return from FUNCTION_ARG. */
6193 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6195 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6196 be returned in this co-processor's registers. */
6197 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6199 /* Allocate and return an RTX element to hold the return type of a call. This
6200 routine must not fail and will only be called if is_return_candidate
6201 returned true with the same parameters. */
6202 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6204 /* Finish processing this argument and prepare to start processing
6206 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6207 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6215 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6220 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6221 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6228 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6230 /* We aren't passed a decl, so we can't check that a call is local.
6231 However, it isn't clear that that would be a win anyway, since it
6232 might limit some tail-calling opportunities. */
6233 enum arm_pcs pcs_variant
;
6237 const_tree fndecl
= NULL_TREE
;
6239 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6242 fntype
= TREE_TYPE (fntype
);
6245 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6248 pcs_variant
= arm_pcs_default
;
6250 if (pcs_variant
!= ARM_PCS_AAPCS
)
6254 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6255 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6264 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6267 /* We aren't passed a decl, so we can't check that a call is local.
6268 However, it isn't clear that that would be a win anyway, since it
6269 might limit some tail-calling opportunities. */
6270 enum arm_pcs pcs_variant
;
6271 int unsignedp ATTRIBUTE_UNUSED
;
6275 const_tree fndecl
= NULL_TREE
;
6277 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6280 fntype
= TREE_TYPE (fntype
);
6283 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6286 pcs_variant
= arm_pcs_default
;
6288 /* Promote integer types. */
6289 if (type
&& INTEGRAL_TYPE_P (type
))
6290 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6292 if (pcs_variant
!= ARM_PCS_AAPCS
)
6296 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6297 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6299 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6303 /* Promotes small structs returned in a register to full-word size
6304 for big-endian AAPCS. */
6305 if (type
&& arm_return_in_msb (type
))
6307 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6308 if (size
% UNITS_PER_WORD
!= 0)
6310 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6311 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6315 return gen_rtx_REG (mode
, R0_REGNUM
);
6319 aapcs_libcall_value (machine_mode mode
)
6321 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6322 && GET_MODE_SIZE (mode
) <= 4)
6325 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6328 /* Lay out a function argument using the AAPCS rules. The rule
6329 numbers referred to here are those in the AAPCS. */
6331 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6332 const_tree type
, bool named
)
6337 /* We only need to do this once per argument. */
6338 if (pcum
->aapcs_arg_processed
)
6341 pcum
->aapcs_arg_processed
= true;
6343 /* Special case: if named is false then we are handling an incoming
6344 anonymous argument which is on the stack. */
6348 /* Is this a potential co-processor register candidate? */
6349 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6351 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6352 pcum
->aapcs_cprc_slot
= slot
;
6354 /* We don't have to apply any of the rules from part B of the
6355 preparation phase, these are handled elsewhere in the
6360 /* A Co-processor register candidate goes either in its own
6361 class of registers or on the stack. */
6362 if (!pcum
->aapcs_cprc_failed
[slot
])
6364 /* C1.cp - Try to allocate the argument to co-processor
6366 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6369 /* C2.cp - Put the argument on the stack and note that we
6370 can't assign any more candidates in this slot. We also
6371 need to note that we have allocated stack space, so that
6372 we won't later try to split a non-cprc candidate between
6373 core registers and the stack. */
6374 pcum
->aapcs_cprc_failed
[slot
] = true;
6375 pcum
->can_split
= false;
6378 /* We didn't get a register, so this argument goes on the
6380 gcc_assert (pcum
->can_split
== false);
6385 /* C3 - For double-word aligned arguments, round the NCRN up to the
6386 next even number. */
6387 ncrn
= pcum
->aapcs_ncrn
;
6390 int res
= arm_needs_doubleword_align (mode
, type
);
6391 /* Only warn during RTL expansion of call stmts, otherwise we would
6392 warn e.g. during gimplification even on functions that will be
6393 always inlined, and we'd warn multiple times. Don't warn when
6394 called in expand_function_start either, as we warn instead in
6395 arm_function_arg_boundary in that case. */
6396 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6397 inform (input_location
, "parameter passing for argument of type "
6398 "%qT changed in GCC 7.1", type
);
6403 nregs
= ARM_NUM_REGS2(mode
, type
);
6405 /* Sigh, this test should really assert that nregs > 0, but a GCC
6406 extension allows empty structs and then gives them empty size; it
6407 then allows such a structure to be passed by value. For some of
6408 the code below we have to pretend that such an argument has
6409 non-zero size so that we 'locate' it correctly either in
6410 registers or on the stack. */
6411 gcc_assert (nregs
>= 0);
6413 nregs2
= nregs
? nregs
: 1;
6415 /* C4 - Argument fits entirely in core registers. */
6416 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6418 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6419 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6423 /* C5 - Some core registers left and there are no arguments already
6424 on the stack: split this argument between the remaining core
6425 registers and the stack. */
6426 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6428 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6429 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6430 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6434 /* C6 - NCRN is set to 4. */
6435 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6437 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6441 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6442 for a call to a function whose data type is FNTYPE.
6443 For a library call, FNTYPE is NULL. */
6445 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6447 tree fndecl ATTRIBUTE_UNUSED
)
6449 /* Long call handling. */
6451 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6453 pcum
->pcs_variant
= arm_pcs_default
;
6455 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6457 if (arm_libcall_uses_aapcs_base (libname
))
6458 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6460 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6461 pcum
->aapcs_reg
= NULL_RTX
;
6462 pcum
->aapcs_partial
= 0;
6463 pcum
->aapcs_arg_processed
= false;
6464 pcum
->aapcs_cprc_slot
= -1;
6465 pcum
->can_split
= true;
6467 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6471 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6473 pcum
->aapcs_cprc_failed
[i
] = false;
6474 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6482 /* On the ARM, the offset starts at 0. */
6484 pcum
->iwmmxt_nregs
= 0;
6485 pcum
->can_split
= true;
6487 /* Varargs vectors are treated the same as long long.
6488 named_count avoids having to change the way arm handles 'named' */
6489 pcum
->named_count
= 0;
6492 if (TARGET_REALLY_IWMMXT
&& fntype
)
6496 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6498 fn_arg
= TREE_CHAIN (fn_arg
))
6499 pcum
->named_count
+= 1;
6501 if (! pcum
->named_count
)
6502 pcum
->named_count
= INT_MAX
;
6506 /* Return 1 if double word alignment is required for argument passing.
6507 Return -1 if double word alignment used to be required for argument
6508 passing before PR77728 ABI fix, but is not required anymore.
6509 Return 0 if double word alignment is not required and wasn't requried
6512 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6515 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6517 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6518 if (!AGGREGATE_TYPE_P (type
))
6519 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6521 /* Array types: Use member alignment of element type. */
6522 if (TREE_CODE (type
) == ARRAY_TYPE
)
6523 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6526 /* Record/aggregate types: Use greatest member alignment of any member. */
6527 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6528 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6530 if (TREE_CODE (field
) == FIELD_DECL
)
6533 /* Before PR77728 fix, we were incorrectly considering also
6534 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6535 Make sure we can warn about that with -Wpsabi. */
6543 /* Determine where to put an argument to a function.
6544 Value is zero to push the argument on the stack,
6545 or a hard register in which to store the argument.
6547 MODE is the argument's machine mode.
6548 TYPE is the data type of the argument (as a tree).
6549 This is null for libcalls where that information may
6551 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6552 the preceding args and about the function being called.
6553 NAMED is nonzero if this argument is a named parameter
6554 (otherwise it is an extra parameter matching an ellipsis).
6556 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6557 other arguments are passed on the stack. If (NAMED == 0) (which happens
6558 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6559 defined), say it is passed in the stack (function_prologue will
6560 indeed make it pass in the stack if necessary). */
6563 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6564 const_tree type
, bool named
)
6566 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6569 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6570 a call insn (op3 of a call_value insn). */
6571 if (mode
== VOIDmode
)
6574 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6576 aapcs_layout_arg (pcum
, mode
, type
, named
);
6577 return pcum
->aapcs_reg
;
6580 /* Varargs vectors are treated the same as long long.
6581 named_count avoids having to change the way arm handles 'named' */
6582 if (TARGET_IWMMXT_ABI
6583 && arm_vector_mode_supported_p (mode
)
6584 && pcum
->named_count
> pcum
->nargs
+ 1)
6586 if (pcum
->iwmmxt_nregs
<= 9)
6587 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6590 pcum
->can_split
= false;
6595 /* Put doubleword aligned quantities in even register pairs. */
6596 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6598 int res
= arm_needs_doubleword_align (mode
, type
);
6599 if (res
< 0 && warn_psabi
)
6600 inform (input_location
, "parameter passing for argument of type "
6601 "%qT changed in GCC 7.1", type
);
6606 /* Only allow splitting an arg between regs and memory if all preceding
6607 args were allocated to regs. For args passed by reference we only count
6608 the reference pointer. */
6609 if (pcum
->can_split
)
6612 nregs
= ARM_NUM_REGS2 (mode
, type
);
6614 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6617 return gen_rtx_REG (mode
, pcum
->nregs
);
6621 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6623 if (!ARM_DOUBLEWORD_ALIGN
)
6624 return PARM_BOUNDARY
;
6626 int res
= arm_needs_doubleword_align (mode
, type
);
6627 if (res
< 0 && warn_psabi
)
6628 inform (input_location
, "parameter passing for argument of type %qT "
6629 "changed in GCC 7.1", type
);
6631 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6635 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6636 tree type
, bool named
)
6638 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6639 int nregs
= pcum
->nregs
;
6641 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6643 aapcs_layout_arg (pcum
, mode
, type
, named
);
6644 return pcum
->aapcs_partial
;
6647 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6650 if (NUM_ARG_REGS
> nregs
6651 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6653 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6658 /* Update the data in PCUM to advance over an argument
6659 of mode MODE and data type TYPE.
6660 (TYPE is null for libcalls where that information may not be available.) */
6663 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6664 const_tree type
, bool named
)
6666 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6668 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6670 aapcs_layout_arg (pcum
, mode
, type
, named
);
6672 if (pcum
->aapcs_cprc_slot
>= 0)
6674 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6676 pcum
->aapcs_cprc_slot
= -1;
6679 /* Generic stuff. */
6680 pcum
->aapcs_arg_processed
= false;
6681 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6682 pcum
->aapcs_reg
= NULL_RTX
;
6683 pcum
->aapcs_partial
= 0;
6688 if (arm_vector_mode_supported_p (mode
)
6689 && pcum
->named_count
> pcum
->nargs
6690 && TARGET_IWMMXT_ABI
)
6691 pcum
->iwmmxt_nregs
+= 1;
6693 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6697 /* Variable sized types are passed by reference. This is a GCC
6698 extension to the ARM ABI. */
6701 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6702 machine_mode mode ATTRIBUTE_UNUSED
,
6703 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6705 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6708 /* Encode the current state of the #pragma [no_]long_calls. */
6711 OFF
, /* No #pragma [no_]long_calls is in effect. */
6712 LONG
, /* #pragma long_calls is in effect. */
6713 SHORT
/* #pragma no_long_calls is in effect. */
6716 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6719 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6721 arm_pragma_long_calls
= LONG
;
6725 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6727 arm_pragma_long_calls
= SHORT
;
6731 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6733 arm_pragma_long_calls
= OFF
;
6736 /* Handle an attribute requiring a FUNCTION_DECL;
6737 arguments as in struct attribute_spec.handler. */
6739 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6740 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6742 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6744 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6746 *no_add_attrs
= true;
6752 /* Handle an "interrupt" or "isr" attribute;
6753 arguments as in struct attribute_spec.handler. */
6755 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6760 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6762 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6764 *no_add_attrs
= true;
6766 /* FIXME: the argument if any is checked for type attributes;
6767 should it be checked for decl ones? */
6771 if (TREE_CODE (*node
) == FUNCTION_TYPE
6772 || TREE_CODE (*node
) == METHOD_TYPE
)
6774 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6776 warning (OPT_Wattributes
, "%qE attribute ignored",
6778 *no_add_attrs
= true;
6781 else if (TREE_CODE (*node
) == POINTER_TYPE
6782 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6783 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6784 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6786 *node
= build_variant_type_copy (*node
);
6787 TREE_TYPE (*node
) = build_type_attribute_variant
6789 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6790 *no_add_attrs
= true;
6794 /* Possibly pass this attribute on from the type to a decl. */
6795 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6796 | (int) ATTR_FLAG_FUNCTION_NEXT
6797 | (int) ATTR_FLAG_ARRAY_NEXT
))
6799 *no_add_attrs
= true;
6800 return tree_cons (name
, args
, NULL_TREE
);
6804 warning (OPT_Wattributes
, "%qE attribute ignored",
6813 /* Handle a "pcs" attribute; arguments as in struct
6814 attribute_spec.handler. */
6816 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6817 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6819 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6821 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6822 *no_add_attrs
= true;
6827 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6828 /* Handle the "notshared" attribute. This attribute is another way of
6829 requesting hidden visibility. ARM's compiler supports
6830 "__declspec(notshared)"; we support the same thing via an
6834 arm_handle_notshared_attribute (tree
*node
,
6835 tree name ATTRIBUTE_UNUSED
,
6836 tree args ATTRIBUTE_UNUSED
,
6837 int flags ATTRIBUTE_UNUSED
,
6840 tree decl
= TYPE_NAME (*node
);
6844 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6845 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6846 *no_add_attrs
= false;
6852 /* This function returns true if a function with declaration FNDECL and type
6853 FNTYPE uses the stack to pass arguments or return variables and false
6854 otherwise. This is used for functions with the attributes
6855 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6856 diagnostic messages if the stack is used. NAME is the name of the attribute
6860 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6862 function_args_iterator args_iter
;
6863 CUMULATIVE_ARGS args_so_far_v
;
6864 cumulative_args_t args_so_far
;
6865 bool first_param
= true;
6866 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6868 /* Error out if any argument is passed on the stack. */
6869 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6870 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6871 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6874 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6876 prev_arg_type
= arg_type
;
6877 if (VOID_TYPE_P (arg_type
))
6881 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6882 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6884 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6886 error ("%qE attribute not available to functions with arguments "
6887 "passed on the stack", name
);
6890 first_param
= false;
6893 /* Error out for variadic functions since we cannot control how many
6894 arguments will be passed and thus stack could be used. stdarg_p () is not
6895 used for the checking to avoid browsing arguments twice. */
6896 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6898 error ("%qE attribute not available to functions with variable number "
6899 "of arguments", name
);
6903 /* Error out if return value is passed on the stack. */
6904 ret_type
= TREE_TYPE (fntype
);
6905 if (arm_return_in_memory (ret_type
, fntype
))
6907 error ("%qE attribute not available to functions that return value on "
6914 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6915 function will check whether the attribute is allowed here and will add the
6916 attribute to the function declaration tree or otherwise issue a warning. */
6919 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6928 *no_add_attrs
= true;
6929 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6934 /* Ignore attribute for function types. */
6935 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6937 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6939 *no_add_attrs
= true;
6945 /* Warn for static linkage functions. */
6946 if (!TREE_PUBLIC (fndecl
))
6948 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6949 "with static linkage", name
);
6950 *no_add_attrs
= true;
6954 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6955 TREE_TYPE (fndecl
));
6960 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6961 function will check whether the attribute is allowed here and will add the
6962 attribute to the function type tree or otherwise issue a diagnostic. The
6963 reason we check this at declaration time is to only allow the use of the
6964 attribute with declarations of function pointers and not function
6965 declarations. This function checks NODE is of the expected type and issues
6966 diagnostics otherwise using NAME. If it is not of the expected type
6967 *NO_ADD_ATTRS will be set to true. */
6970 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6975 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6980 *no_add_attrs
= true;
6981 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6986 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
6989 fntype
= TREE_TYPE (decl
);
6992 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
6993 fntype
= TREE_TYPE (fntype
);
6995 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
6997 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
6998 "function pointer", name
);
6999 *no_add_attrs
= true;
7003 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7008 /* Prevent trees being shared among function types with and without
7009 cmse_nonsecure_call attribute. */
7010 type
= TREE_TYPE (decl
);
7012 type
= build_distinct_type_copy (type
);
7013 TREE_TYPE (decl
) = type
;
7016 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7019 fntype
= TREE_TYPE (fntype
);
7020 fntype
= build_distinct_type_copy (fntype
);
7021 TREE_TYPE (type
) = fntype
;
7024 /* Construct a type attribute and add it to the function type. */
7025 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7026 TYPE_ATTRIBUTES (fntype
));
7027 TYPE_ATTRIBUTES (fntype
) = attrs
;
7031 /* Return 0 if the attributes for two types are incompatible, 1 if they
7032 are compatible, and 2 if they are nearly compatible (which causes a
7033 warning to be generated). */
7035 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7039 /* Check for mismatch of non-default calling convention. */
7040 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7043 /* Check for mismatched call attributes. */
7044 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7045 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7046 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7047 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7049 /* Only bother to check if an attribute is defined. */
7050 if (l1
| l2
| s1
| s2
)
7052 /* If one type has an attribute, the other must have the same attribute. */
7053 if ((l1
!= l2
) || (s1
!= s2
))
7056 /* Disallow mixed attributes. */
7057 if ((l1
& s2
) || (l2
& s1
))
7061 /* Check for mismatched ISR attribute. */
7062 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7064 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7065 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7067 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7071 l1
= lookup_attribute ("cmse_nonsecure_call",
7072 TYPE_ATTRIBUTES (type1
)) != NULL
;
7073 l2
= lookup_attribute ("cmse_nonsecure_call",
7074 TYPE_ATTRIBUTES (type2
)) != NULL
;
7082 /* Assigns default attributes to newly defined type. This is used to
7083 set short_call/long_call attributes for function types of
7084 functions defined inside corresponding #pragma scopes. */
7086 arm_set_default_type_attributes (tree type
)
7088 /* Add __attribute__ ((long_call)) to all functions, when
7089 inside #pragma long_calls or __attribute__ ((short_call)),
7090 when inside #pragma no_long_calls. */
7091 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7093 tree type_attr_list
, attr_name
;
7094 type_attr_list
= TYPE_ATTRIBUTES (type
);
7096 if (arm_pragma_long_calls
== LONG
)
7097 attr_name
= get_identifier ("long_call");
7098 else if (arm_pragma_long_calls
== SHORT
)
7099 attr_name
= get_identifier ("short_call");
7103 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7104 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7108 /* Return true if DECL is known to be linked into section SECTION. */
7111 arm_function_in_section_p (tree decl
, section
*section
)
7113 /* We can only be certain about the prevailing symbol definition. */
7114 if (!decl_binds_to_current_def_p (decl
))
7117 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7118 if (!DECL_SECTION_NAME (decl
))
7120 /* Make sure that we will not create a unique section for DECL. */
7121 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7125 return function_section (decl
) == section
;
7128 /* Return nonzero if a 32-bit "long_call" should be generated for
7129 a call from the current function to DECL. We generate a long_call
7132 a. has an __attribute__((long call))
7133 or b. is within the scope of a #pragma long_calls
7134 or c. the -mlong-calls command line switch has been specified
7136 However we do not generate a long call if the function:
7138 d. has an __attribute__ ((short_call))
7139 or e. is inside the scope of a #pragma no_long_calls
7140 or f. is defined in the same section as the current function. */
7143 arm_is_long_call_p (tree decl
)
7148 return TARGET_LONG_CALLS
;
7150 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7151 if (lookup_attribute ("short_call", attrs
))
7154 /* For "f", be conservative, and only cater for cases in which the
7155 whole of the current function is placed in the same section. */
7156 if (!flag_reorder_blocks_and_partition
7157 && TREE_CODE (decl
) == FUNCTION_DECL
7158 && arm_function_in_section_p (decl
, current_function_section ()))
7161 if (lookup_attribute ("long_call", attrs
))
7164 return TARGET_LONG_CALLS
;
7167 /* Return nonzero if it is ok to make a tail-call to DECL. */
7169 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7171 unsigned long func_type
;
7173 if (cfun
->machine
->sibcall_blocked
)
7176 /* Never tailcall something if we are generating code for Thumb-1. */
7180 /* The PIC register is live on entry to VxWorks PLT entries, so we
7181 must make the call before restoring the PIC register. */
7182 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7185 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7186 may be used both as target of the call and base register for restoring
7187 the VFP registers */
7188 if (TARGET_APCS_FRAME
&& TARGET_ARM
7189 && TARGET_HARD_FLOAT
7190 && decl
&& arm_is_long_call_p (decl
))
7193 /* If we are interworking and the function is not declared static
7194 then we can't tail-call it unless we know that it exists in this
7195 compilation unit (since it might be a Thumb routine). */
7196 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7197 && !TREE_ASM_WRITTEN (decl
))
7200 func_type
= arm_current_func_type ();
7201 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7202 if (IS_INTERRUPT (func_type
))
7205 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7206 generated for entry functions themselves. */
7207 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7210 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7211 this would complicate matters for later code generation. */
7212 if (TREE_CODE (exp
) == CALL_EXPR
)
7214 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7215 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7219 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7221 /* Check that the return value locations are the same. For
7222 example that we aren't returning a value from the sibling in
7223 a VFP register but then need to transfer it to a core
7226 tree decl_or_type
= decl
;
7228 /* If it is an indirect function pointer, get the function type. */
7230 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7232 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7233 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7235 if (!rtx_equal_p (a
, b
))
7239 /* Never tailcall if function may be called with a misaligned SP. */
7240 if (IS_STACKALIGN (func_type
))
7243 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7244 references should become a NOP. Don't convert such calls into
7246 if (TARGET_AAPCS_BASED
7247 && arm_abi
== ARM_ABI_AAPCS
7249 && DECL_WEAK (decl
))
7252 /* We cannot do a tailcall for an indirect call by descriptor if all the
7253 argument registers are used because the only register left to load the
7254 address is IP and it will already contain the static chain. */
7255 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7257 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7258 CUMULATIVE_ARGS cum
;
7259 cumulative_args_t cum_v
;
7261 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7262 cum_v
= pack_cumulative_args (&cum
);
7264 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7266 tree type
= TREE_VALUE (t
);
7267 if (!VOID_TYPE_P (type
))
7268 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7271 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7275 /* Everything else is ok. */
7280 /* Addressing mode support functions. */
7282 /* Return nonzero if X is a legitimate immediate operand when compiling
7283 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7285 legitimate_pic_operand_p (rtx x
)
7287 if (GET_CODE (x
) == SYMBOL_REF
7288 || (GET_CODE (x
) == CONST
7289 && GET_CODE (XEXP (x
, 0)) == PLUS
7290 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7296 /* Record that the current function needs a PIC register. Initialize
7297 cfun->machine->pic_reg if we have not already done so. */
7300 require_pic_register (void)
7302 /* A lot of the logic here is made obscure by the fact that this
7303 routine gets called as part of the rtx cost estimation process.
7304 We don't want those calls to affect any assumptions about the real
7305 function; and further, we can't call entry_of_function() until we
7306 start the real expansion process. */
7307 if (!crtl
->uses_pic_offset_table
)
7309 gcc_assert (can_create_pseudo_p ());
7310 if (arm_pic_register
!= INVALID_REGNUM
7311 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7313 if (!cfun
->machine
->pic_reg
)
7314 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7316 /* Play games to avoid marking the function as needing pic
7317 if we are being called as part of the cost-estimation
7319 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7320 crtl
->uses_pic_offset_table
= 1;
7324 rtx_insn
*seq
, *insn
;
7326 if (!cfun
->machine
->pic_reg
)
7327 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7329 /* Play games to avoid marking the function as needing pic
7330 if we are being called as part of the cost-estimation
7332 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7334 crtl
->uses_pic_offset_table
= 1;
7337 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7338 && arm_pic_register
> LAST_LO_REGNUM
)
7339 emit_move_insn (cfun
->machine
->pic_reg
,
7340 gen_rtx_REG (Pmode
, arm_pic_register
));
7342 arm_load_pic_register (0UL);
7347 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7349 INSN_LOCATION (insn
) = prologue_location
;
7351 /* We can be called during expansion of PHI nodes, where
7352 we can't yet emit instructions directly in the final
7353 insn stream. Queue the insns on the entry edge, they will
7354 be committed after everything else is expanded. */
7355 insert_insn_on_edge (seq
,
7356 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7363 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7365 if (GET_CODE (orig
) == SYMBOL_REF
7366 || GET_CODE (orig
) == LABEL_REF
)
7370 gcc_assert (can_create_pseudo_p ());
7371 reg
= gen_reg_rtx (Pmode
);
7374 /* VxWorks does not impose a fixed gap between segments; the run-time
7375 gap can be different from the object-file gap. We therefore can't
7376 use GOTOFF unless we are absolutely sure that the symbol is in the
7377 same segment as the GOT. Unfortunately, the flexibility of linker
7378 scripts means that we can't be sure of that in general, so assume
7379 that GOTOFF is never valid on VxWorks. */
7380 /* References to weak symbols cannot be resolved locally: they
7381 may be overridden by a non-weak definition at link time. */
7383 if ((GET_CODE (orig
) == LABEL_REF
7384 || (GET_CODE (orig
) == SYMBOL_REF
7385 && SYMBOL_REF_LOCAL_P (orig
)
7386 && (SYMBOL_REF_DECL (orig
)
7387 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7389 && arm_pic_data_is_text_relative
)
7390 insn
= arm_pic_static_addr (orig
, reg
);
7396 /* If this function doesn't have a pic register, create one now. */
7397 require_pic_register ();
7399 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7401 /* Make the MEM as close to a constant as possible. */
7402 mem
= SET_SRC (pat
);
7403 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7404 MEM_READONLY_P (mem
) = 1;
7405 MEM_NOTRAP_P (mem
) = 1;
7407 insn
= emit_insn (pat
);
7410 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7412 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7416 else if (GET_CODE (orig
) == CONST
)
7420 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7421 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7424 /* Handle the case where we have: const (UNSPEC_TLS). */
7425 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7426 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7429 /* Handle the case where we have:
7430 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7432 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7433 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7434 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7436 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7442 gcc_assert (can_create_pseudo_p ());
7443 reg
= gen_reg_rtx (Pmode
);
7446 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7448 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7449 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7450 base
== reg
? 0 : reg
);
7452 if (CONST_INT_P (offset
))
7454 /* The base register doesn't really matter, we only want to
7455 test the index for the appropriate mode. */
7456 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7458 gcc_assert (can_create_pseudo_p ());
7459 offset
= force_reg (Pmode
, offset
);
7462 if (CONST_INT_P (offset
))
7463 return plus_constant (Pmode
, base
, INTVAL (offset
));
7466 if (GET_MODE_SIZE (mode
) > 4
7467 && (GET_MODE_CLASS (mode
) == MODE_INT
7468 || TARGET_SOFT_FLOAT
))
7470 emit_insn (gen_addsi3 (reg
, base
, offset
));
7474 return gen_rtx_PLUS (Pmode
, base
, offset
);
7481 /* Find a spare register to use during the prolog of a function. */
7484 thumb_find_work_register (unsigned long pushed_regs_mask
)
7488 /* Check the argument registers first as these are call-used. The
7489 register allocation order means that sometimes r3 might be used
7490 but earlier argument registers might not, so check them all. */
7491 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7492 if (!df_regs_ever_live_p (reg
))
7495 /* Before going on to check the call-saved registers we can try a couple
7496 more ways of deducing that r3 is available. The first is when we are
7497 pushing anonymous arguments onto the stack and we have less than 4
7498 registers worth of fixed arguments(*). In this case r3 will be part of
7499 the variable argument list and so we can be sure that it will be
7500 pushed right at the start of the function. Hence it will be available
7501 for the rest of the prologue.
7502 (*): ie crtl->args.pretend_args_size is greater than 0. */
7503 if (cfun
->machine
->uses_anonymous_args
7504 && crtl
->args
.pretend_args_size
> 0)
7505 return LAST_ARG_REGNUM
;
7507 /* The other case is when we have fixed arguments but less than 4 registers
7508 worth. In this case r3 might be used in the body of the function, but
7509 it is not being used to convey an argument into the function. In theory
7510 we could just check crtl->args.size to see how many bytes are
7511 being passed in argument registers, but it seems that it is unreliable.
7512 Sometimes it will have the value 0 when in fact arguments are being
7513 passed. (See testcase execute/20021111-1.c for an example). So we also
7514 check the args_info.nregs field as well. The problem with this field is
7515 that it makes no allowances for arguments that are passed to the
7516 function but which are not used. Hence we could miss an opportunity
7517 when a function has an unused argument in r3. But it is better to be
7518 safe than to be sorry. */
7519 if (! cfun
->machine
->uses_anonymous_args
7520 && crtl
->args
.size
>= 0
7521 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7522 && (TARGET_AAPCS_BASED
7523 ? crtl
->args
.info
.aapcs_ncrn
< 4
7524 : crtl
->args
.info
.nregs
< 4))
7525 return LAST_ARG_REGNUM
;
7527 /* Otherwise look for a call-saved register that is going to be pushed. */
7528 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7529 if (pushed_regs_mask
& (1 << reg
))
7534 /* Thumb-2 can use high regs. */
7535 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7536 if (pushed_regs_mask
& (1 << reg
))
7539 /* Something went wrong - thumb_compute_save_reg_mask()
7540 should have arranged for a suitable register to be pushed. */
7544 static GTY(()) int pic_labelno
;
7546 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7550 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7552 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7554 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7557 gcc_assert (flag_pic
);
7559 pic_reg
= cfun
->machine
->pic_reg
;
7560 if (TARGET_VXWORKS_RTP
)
7562 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7563 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7564 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7566 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7568 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7569 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7573 /* We use an UNSPEC rather than a LABEL_REF because this label
7574 never appears in the code stream. */
7576 labelno
= GEN_INT (pic_labelno
++);
7577 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7578 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7580 /* On the ARM the PC register contains 'dot + 8' at the time of the
7581 addition, on the Thumb it is 'dot + 4'. */
7582 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7583 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7585 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7589 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7591 else /* TARGET_THUMB1 */
7593 if (arm_pic_register
!= INVALID_REGNUM
7594 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7596 /* We will have pushed the pic register, so we should always be
7597 able to find a work register. */
7598 pic_tmp
= gen_rtx_REG (SImode
,
7599 thumb_find_work_register (saved_regs
));
7600 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7601 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7602 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7604 else if (arm_pic_register
!= INVALID_REGNUM
7605 && arm_pic_register
> LAST_LO_REGNUM
7606 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7608 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7609 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7610 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7613 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7617 /* Need to emit this whether or not we obey regdecls,
7618 since setjmp/longjmp can cause life info to screw up. */
7622 /* Generate code to load the address of a static var when flag_pic is set. */
7624 arm_pic_static_addr (rtx orig
, rtx reg
)
7626 rtx l1
, labelno
, offset_rtx
;
7628 gcc_assert (flag_pic
);
7630 /* We use an UNSPEC rather than a LABEL_REF because this label
7631 never appears in the code stream. */
7632 labelno
= GEN_INT (pic_labelno
++);
7633 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7634 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7636 /* On the ARM the PC register contains 'dot + 8' at the time of the
7637 addition, on the Thumb it is 'dot + 4'. */
7638 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7639 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7640 UNSPEC_SYMBOL_OFFSET
);
7641 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7643 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7646 /* Return nonzero if X is valid as an ARM state addressing register. */
7648 arm_address_register_rtx_p (rtx x
, int strict_p
)
7658 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7660 return (regno
<= LAST_ARM_REGNUM
7661 || regno
>= FIRST_PSEUDO_REGISTER
7662 || regno
== FRAME_POINTER_REGNUM
7663 || regno
== ARG_POINTER_REGNUM
);
7666 /* Return TRUE if this rtx is the difference of a symbol and a label,
7667 and will reduce to a PC-relative relocation in the object file.
7668 Expressions like this can be left alone when generating PIC, rather
7669 than forced through the GOT. */
7671 pcrel_constant_p (rtx x
)
7673 if (GET_CODE (x
) == MINUS
)
7674 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7679 /* Return true if X will surely end up in an index register after next
7682 will_be_in_index_register (const_rtx x
)
7684 /* arm.md: calculate_pic_address will split this into a register. */
7685 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7688 /* Return nonzero if X is a valid ARM state address operand. */
7690 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7694 enum rtx_code code
= GET_CODE (x
);
7696 if (arm_address_register_rtx_p (x
, strict_p
))
7699 use_ldrd
= (TARGET_LDRD
7700 && (mode
== DImode
|| mode
== DFmode
));
7702 if (code
== POST_INC
|| code
== PRE_DEC
7703 || ((code
== PRE_INC
|| code
== POST_DEC
)
7704 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7705 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7707 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7708 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7709 && GET_CODE (XEXP (x
, 1)) == PLUS
7710 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7712 rtx addend
= XEXP (XEXP (x
, 1), 1);
7714 /* Don't allow ldrd post increment by register because it's hard
7715 to fixup invalid register choices. */
7717 && GET_CODE (x
) == POST_MODIFY
7721 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7722 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7725 /* After reload constants split into minipools will have addresses
7726 from a LABEL_REF. */
7727 else if (reload_completed
7728 && (code
== LABEL_REF
7730 && GET_CODE (XEXP (x
, 0)) == PLUS
7731 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7732 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7735 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7738 else if (code
== PLUS
)
7740 rtx xop0
= XEXP (x
, 0);
7741 rtx xop1
= XEXP (x
, 1);
7743 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7744 && ((CONST_INT_P (xop1
)
7745 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7746 || (!strict_p
&& will_be_in_index_register (xop1
))))
7747 || (arm_address_register_rtx_p (xop1
, strict_p
)
7748 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7752 /* Reload currently can't handle MINUS, so disable this for now */
7753 else if (GET_CODE (x
) == MINUS
)
7755 rtx xop0
= XEXP (x
, 0);
7756 rtx xop1
= XEXP (x
, 1);
7758 return (arm_address_register_rtx_p (xop0
, strict_p
)
7759 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7763 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7764 && code
== SYMBOL_REF
7765 && CONSTANT_POOL_ADDRESS_P (x
)
7767 && symbol_mentioned_p (get_pool_constant (x
))
7768 && ! pcrel_constant_p (get_pool_constant (x
))))
7774 /* Return true if we can avoid creating a constant pool entry for x. */
7776 can_avoid_literal_pool_for_label_p (rtx x
)
7778 /* Normally we can assign constant values to target registers without
7779 the help of constant pool. But there are cases we have to use constant
7781 1) assign a label to register.
7782 2) sign-extend a 8bit value to 32bit and then assign to register.
7784 Constant pool access in format:
7785 (set (reg r0) (mem (symbol_ref (".LC0"))))
7786 will cause the use of literal pool (later in function arm_reorg).
7787 So here we mark such format as an invalid format, then the compiler
7788 will adjust it into:
7789 (set (reg r0) (symbol_ref (".LC0")))
7790 (set (reg r0) (mem (reg r0))).
7791 No extra register is required, and (mem (reg r0)) won't cause the use
7792 of literal pools. */
7793 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
7794 && CONSTANT_POOL_ADDRESS_P (x
))
7800 /* Return nonzero if X is a valid Thumb-2 address operand. */
7802 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7805 enum rtx_code code
= GET_CODE (x
);
7807 if (arm_address_register_rtx_p (x
, strict_p
))
7810 use_ldrd
= (TARGET_LDRD
7811 && (mode
== DImode
|| mode
== DFmode
));
7813 if (code
== POST_INC
|| code
== PRE_DEC
7814 || ((code
== PRE_INC
|| code
== POST_DEC
)
7815 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7816 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7818 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7819 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7820 && GET_CODE (XEXP (x
, 1)) == PLUS
7821 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7823 /* Thumb-2 only has autoincrement by constant. */
7824 rtx addend
= XEXP (XEXP (x
, 1), 1);
7825 HOST_WIDE_INT offset
;
7827 if (!CONST_INT_P (addend
))
7830 offset
= INTVAL(addend
);
7831 if (GET_MODE_SIZE (mode
) <= 4)
7832 return (offset
> -256 && offset
< 256);
7834 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7835 && (offset
& 3) == 0);
7838 /* After reload constants split into minipools will have addresses
7839 from a LABEL_REF. */
7840 else if (reload_completed
7841 && (code
== LABEL_REF
7843 && GET_CODE (XEXP (x
, 0)) == PLUS
7844 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7845 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7848 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7851 else if (code
== PLUS
)
7853 rtx xop0
= XEXP (x
, 0);
7854 rtx xop1
= XEXP (x
, 1);
7856 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7857 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7858 || (!strict_p
&& will_be_in_index_register (xop1
))))
7859 || (arm_address_register_rtx_p (xop1
, strict_p
)
7860 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7863 else if (can_avoid_literal_pool_for_label_p (x
))
7866 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7867 && code
== SYMBOL_REF
7868 && CONSTANT_POOL_ADDRESS_P (x
)
7870 && symbol_mentioned_p (get_pool_constant (x
))
7871 && ! pcrel_constant_p (get_pool_constant (x
))))
7877 /* Return nonzero if INDEX is valid for an address index operand in
7880 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7883 HOST_WIDE_INT range
;
7884 enum rtx_code code
= GET_CODE (index
);
7886 /* Standard coprocessor addressing modes. */
7887 if (TARGET_HARD_FLOAT
7888 && (mode
== SFmode
|| mode
== DFmode
))
7889 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7890 && INTVAL (index
) > -1024
7891 && (INTVAL (index
) & 3) == 0);
7893 /* For quad modes, we restrict the constant offset to be slightly less
7894 than what the instruction format permits. We do this because for
7895 quad mode moves, we will actually decompose them into two separate
7896 double-mode reads or writes. INDEX must therefore be a valid
7897 (double-mode) offset and so should INDEX+8. */
7898 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7899 return (code
== CONST_INT
7900 && INTVAL (index
) < 1016
7901 && INTVAL (index
) > -1024
7902 && (INTVAL (index
) & 3) == 0);
7904 /* We have no such constraint on double mode offsets, so we permit the
7905 full range of the instruction format. */
7906 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7907 return (code
== CONST_INT
7908 && INTVAL (index
) < 1024
7909 && INTVAL (index
) > -1024
7910 && (INTVAL (index
) & 3) == 0);
7912 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7913 return (code
== CONST_INT
7914 && INTVAL (index
) < 1024
7915 && INTVAL (index
) > -1024
7916 && (INTVAL (index
) & 3) == 0);
7918 if (arm_address_register_rtx_p (index
, strict_p
)
7919 && (GET_MODE_SIZE (mode
) <= 4))
7922 if (mode
== DImode
|| mode
== DFmode
)
7924 if (code
== CONST_INT
)
7926 HOST_WIDE_INT val
= INTVAL (index
);
7929 return val
> -256 && val
< 256;
7931 return val
> -4096 && val
< 4092;
7934 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7937 if (GET_MODE_SIZE (mode
) <= 4
7941 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7945 rtx xiop0
= XEXP (index
, 0);
7946 rtx xiop1
= XEXP (index
, 1);
7948 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7949 && power_of_two_operand (xiop1
, SImode
))
7950 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7951 && power_of_two_operand (xiop0
, SImode
)));
7953 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7954 || code
== ASHIFT
|| code
== ROTATERT
)
7956 rtx op
= XEXP (index
, 1);
7958 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7961 && INTVAL (op
) <= 31);
7965 /* For ARM v4 we may be doing a sign-extend operation during the
7971 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7977 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7979 return (code
== CONST_INT
7980 && INTVAL (index
) < range
7981 && INTVAL (index
) > -range
);
7984 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7985 index operand. i.e. 1, 2, 4 or 8. */
7987 thumb2_index_mul_operand (rtx op
)
7991 if (!CONST_INT_P (op
))
7995 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7998 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8000 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8002 enum rtx_code code
= GET_CODE (index
);
8004 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8005 /* Standard coprocessor addressing modes. */
8006 if (TARGET_HARD_FLOAT
8007 && (mode
== SFmode
|| mode
== DFmode
))
8008 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8009 /* Thumb-2 allows only > -256 index range for it's core register
8010 load/stores. Since we allow SF/DF in core registers, we have
8011 to use the intersection between -256~4096 (core) and -1024~1024
8013 && INTVAL (index
) > -256
8014 && (INTVAL (index
) & 3) == 0);
8016 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8018 /* For DImode assume values will usually live in core regs
8019 and only allow LDRD addressing modes. */
8020 if (!TARGET_LDRD
|| mode
!= DImode
)
8021 return (code
== CONST_INT
8022 && INTVAL (index
) < 1024
8023 && INTVAL (index
) > -1024
8024 && (INTVAL (index
) & 3) == 0);
8027 /* For quad modes, we restrict the constant offset to be slightly less
8028 than what the instruction format permits. We do this because for
8029 quad mode moves, we will actually decompose them into two separate
8030 double-mode reads or writes. INDEX must therefore be a valid
8031 (double-mode) offset and so should INDEX+8. */
8032 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8033 return (code
== CONST_INT
8034 && INTVAL (index
) < 1016
8035 && INTVAL (index
) > -1024
8036 && (INTVAL (index
) & 3) == 0);
8038 /* We have no such constraint on double mode offsets, so we permit the
8039 full range of the instruction format. */
8040 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8041 return (code
== CONST_INT
8042 && INTVAL (index
) < 1024
8043 && INTVAL (index
) > -1024
8044 && (INTVAL (index
) & 3) == 0);
8046 if (arm_address_register_rtx_p (index
, strict_p
)
8047 && (GET_MODE_SIZE (mode
) <= 4))
8050 if (mode
== DImode
|| mode
== DFmode
)
8052 if (code
== CONST_INT
)
8054 HOST_WIDE_INT val
= INTVAL (index
);
8055 /* ??? Can we assume ldrd for thumb2? */
8056 /* Thumb-2 ldrd only has reg+const addressing modes. */
8057 /* ldrd supports offsets of +-1020.
8058 However the ldr fallback does not. */
8059 return val
> -256 && val
< 256 && (val
& 3) == 0;
8067 rtx xiop0
= XEXP (index
, 0);
8068 rtx xiop1
= XEXP (index
, 1);
8070 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8071 && thumb2_index_mul_operand (xiop1
))
8072 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8073 && thumb2_index_mul_operand (xiop0
)));
8075 else if (code
== ASHIFT
)
8077 rtx op
= XEXP (index
, 1);
8079 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8082 && INTVAL (op
) <= 3);
8085 return (code
== CONST_INT
8086 && INTVAL (index
) < 4096
8087 && INTVAL (index
) > -256);
8090 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8092 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8102 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8104 return (regno
<= LAST_LO_REGNUM
8105 || regno
> LAST_VIRTUAL_REGISTER
8106 || regno
== FRAME_POINTER_REGNUM
8107 || (GET_MODE_SIZE (mode
) >= 4
8108 && (regno
== STACK_POINTER_REGNUM
8109 || regno
>= FIRST_PSEUDO_REGISTER
8110 || x
== hard_frame_pointer_rtx
8111 || x
== arg_pointer_rtx
)));
8114 /* Return nonzero if x is a legitimate index register. This is the case
8115 for any base register that can access a QImode object. */
8117 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8119 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8122 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8124 The AP may be eliminated to either the SP or the FP, so we use the
8125 least common denominator, e.g. SImode, and offsets from 0 to 64.
8127 ??? Verify whether the above is the right approach.
8129 ??? Also, the FP may be eliminated to the SP, so perhaps that
8130 needs special handling also.
8132 ??? Look at how the mips16 port solves this problem. It probably uses
8133 better ways to solve some of these problems.
8135 Although it is not incorrect, we don't accept QImode and HImode
8136 addresses based on the frame pointer or arg pointer until the
8137 reload pass starts. This is so that eliminating such addresses
8138 into stack based ones won't produce impossible code. */
8140 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8142 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8145 /* ??? Not clear if this is right. Experiment. */
8146 if (GET_MODE_SIZE (mode
) < 4
8147 && !(reload_in_progress
|| reload_completed
)
8148 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8149 || reg_mentioned_p (arg_pointer_rtx
, x
)
8150 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8151 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8152 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8153 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8156 /* Accept any base register. SP only in SImode or larger. */
8157 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8160 /* This is PC relative data before arm_reorg runs. */
8161 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8162 && GET_CODE (x
) == SYMBOL_REF
8163 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8166 /* This is PC relative data after arm_reorg runs. */
8167 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8169 && (GET_CODE (x
) == LABEL_REF
8170 || (GET_CODE (x
) == CONST
8171 && GET_CODE (XEXP (x
, 0)) == PLUS
8172 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8173 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8176 /* Post-inc indexing only supported for SImode and larger. */
8177 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8178 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8181 else if (GET_CODE (x
) == PLUS
)
8183 /* REG+REG address can be any two index registers. */
8184 /* We disallow FRAME+REG addressing since we know that FRAME
8185 will be replaced with STACK, and SP relative addressing only
8186 permits SP+OFFSET. */
8187 if (GET_MODE_SIZE (mode
) <= 4
8188 && XEXP (x
, 0) != frame_pointer_rtx
8189 && XEXP (x
, 1) != frame_pointer_rtx
8190 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8191 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8192 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8195 /* REG+const has 5-7 bit offset for non-SP registers. */
8196 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8197 || XEXP (x
, 0) == arg_pointer_rtx
)
8198 && CONST_INT_P (XEXP (x
, 1))
8199 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8202 /* REG+const has 10-bit offset for SP, but only SImode and
8203 larger is supported. */
8204 /* ??? Should probably check for DI/DFmode overflow here
8205 just like GO_IF_LEGITIMATE_OFFSET does. */
8206 else if (REG_P (XEXP (x
, 0))
8207 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8208 && GET_MODE_SIZE (mode
) >= 4
8209 && CONST_INT_P (XEXP (x
, 1))
8210 && INTVAL (XEXP (x
, 1)) >= 0
8211 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8212 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8215 else if (REG_P (XEXP (x
, 0))
8216 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8217 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8218 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8219 && REGNO (XEXP (x
, 0))
8220 <= LAST_VIRTUAL_POINTER_REGISTER
))
8221 && GET_MODE_SIZE (mode
) >= 4
8222 && CONST_INT_P (XEXP (x
, 1))
8223 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8227 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8228 && GET_MODE_SIZE (mode
) == 4
8229 && GET_CODE (x
) == SYMBOL_REF
8230 && CONSTANT_POOL_ADDRESS_P (x
)
8232 && symbol_mentioned_p (get_pool_constant (x
))
8233 && ! pcrel_constant_p (get_pool_constant (x
))))
8239 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8240 instruction of mode MODE. */
8242 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8244 switch (GET_MODE_SIZE (mode
))
8247 return val
>= 0 && val
< 32;
8250 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8254 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8260 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8263 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8264 else if (TARGET_THUMB2
)
8265 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8266 else /* if (TARGET_THUMB1) */
8267 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8270 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8272 Given an rtx X being reloaded into a reg required to be
8273 in class CLASS, return the class of reg to actually use.
8274 In general this is just CLASS, but for the Thumb core registers and
8275 immediate constants we prefer a LO_REGS class or a subset. */
8278 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8284 if (rclass
== GENERAL_REGS
)
8291 /* Build the SYMBOL_REF for __tls_get_addr. */
8293 static GTY(()) rtx tls_get_addr_libfunc
;
8296 get_tls_get_addr (void)
8298 if (!tls_get_addr_libfunc
)
8299 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8300 return tls_get_addr_libfunc
;
8304 arm_load_tp (rtx target
)
8307 target
= gen_reg_rtx (SImode
);
8311 /* Can return in any reg. */
8312 emit_insn (gen_load_tp_hard (target
));
8316 /* Always returned in r0. Immediately copy the result into a pseudo,
8317 otherwise other uses of r0 (e.g. setting up function arguments) may
8318 clobber the value. */
8322 emit_insn (gen_load_tp_soft ());
8324 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8325 emit_move_insn (target
, tmp
);
8331 load_tls_operand (rtx x
, rtx reg
)
8335 if (reg
== NULL_RTX
)
8336 reg
= gen_reg_rtx (SImode
);
8338 tmp
= gen_rtx_CONST (SImode
, x
);
8340 emit_move_insn (reg
, tmp
);
8346 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8348 rtx label
, labelno
, sum
;
8350 gcc_assert (reloc
!= TLS_DESCSEQ
);
8353 labelno
= GEN_INT (pic_labelno
++);
8354 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8355 label
= gen_rtx_CONST (VOIDmode
, label
);
8357 sum
= gen_rtx_UNSPEC (Pmode
,
8358 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8359 GEN_INT (TARGET_ARM
? 8 : 4)),
8361 reg
= load_tls_operand (sum
, reg
);
8364 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8366 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8368 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8369 LCT_PURE
, /* LCT_CONST? */
8372 rtx_insn
*insns
= get_insns ();
8379 arm_tls_descseq_addr (rtx x
, rtx reg
)
8381 rtx labelno
= GEN_INT (pic_labelno
++);
8382 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8383 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8384 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8385 gen_rtx_CONST (VOIDmode
, label
),
8386 GEN_INT (!TARGET_ARM
)),
8388 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8390 emit_insn (gen_tlscall (x
, labelno
));
8392 reg
= gen_reg_rtx (SImode
);
8394 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8396 emit_move_insn (reg
, reg0
);
8402 legitimize_tls_address (rtx x
, rtx reg
)
8404 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8406 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8410 case TLS_MODEL_GLOBAL_DYNAMIC
:
8411 if (TARGET_GNU2_TLS
)
8413 reg
= arm_tls_descseq_addr (x
, reg
);
8415 tp
= arm_load_tp (NULL_RTX
);
8417 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8421 /* Original scheme */
8422 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8423 dest
= gen_reg_rtx (Pmode
);
8424 emit_libcall_block (insns
, dest
, ret
, x
);
8428 case TLS_MODEL_LOCAL_DYNAMIC
:
8429 if (TARGET_GNU2_TLS
)
8431 reg
= arm_tls_descseq_addr (x
, reg
);
8433 tp
= arm_load_tp (NULL_RTX
);
8435 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8439 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8441 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8442 share the LDM result with other LD model accesses. */
8443 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8445 dest
= gen_reg_rtx (Pmode
);
8446 emit_libcall_block (insns
, dest
, ret
, eqv
);
8448 /* Load the addend. */
8449 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8450 GEN_INT (TLS_LDO32
)),
8452 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8453 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8457 case TLS_MODEL_INITIAL_EXEC
:
8458 labelno
= GEN_INT (pic_labelno
++);
8459 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8460 label
= gen_rtx_CONST (VOIDmode
, label
);
8461 sum
= gen_rtx_UNSPEC (Pmode
,
8462 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8463 GEN_INT (TARGET_ARM
? 8 : 4)),
8465 reg
= load_tls_operand (sum
, reg
);
8468 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8469 else if (TARGET_THUMB2
)
8470 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8473 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8474 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8477 tp
= arm_load_tp (NULL_RTX
);
8479 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8481 case TLS_MODEL_LOCAL_EXEC
:
8482 tp
= arm_load_tp (NULL_RTX
);
8484 reg
= gen_rtx_UNSPEC (Pmode
,
8485 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8487 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8489 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8496 /* Try machine-dependent ways of modifying an illegitimate address
8497 to be legitimate. If we find one, return the new, valid address. */
8499 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8501 if (arm_tls_referenced_p (x
))
8505 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8507 addend
= XEXP (XEXP (x
, 0), 1);
8508 x
= XEXP (XEXP (x
, 0), 0);
8511 if (GET_CODE (x
) != SYMBOL_REF
)
8514 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8516 x
= legitimize_tls_address (x
, NULL_RTX
);
8520 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8529 /* TODO: legitimize_address for Thumb2. */
8532 return thumb_legitimize_address (x
, orig_x
, mode
);
8535 if (GET_CODE (x
) == PLUS
)
8537 rtx xop0
= XEXP (x
, 0);
8538 rtx xop1
= XEXP (x
, 1);
8540 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8541 xop0
= force_reg (SImode
, xop0
);
8543 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8544 && !symbol_mentioned_p (xop1
))
8545 xop1
= force_reg (SImode
, xop1
);
8547 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8548 && CONST_INT_P (xop1
))
8550 HOST_WIDE_INT n
, low_n
;
8554 /* VFP addressing modes actually allow greater offsets, but for
8555 now we just stick with the lowest common denominator. */
8556 if (mode
== DImode
|| mode
== DFmode
)
8568 low_n
= ((mode
) == TImode
? 0
8569 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8573 base_reg
= gen_reg_rtx (SImode
);
8574 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8575 emit_move_insn (base_reg
, val
);
8576 x
= plus_constant (Pmode
, base_reg
, low_n
);
8578 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8579 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8582 /* XXX We don't allow MINUS any more -- see comment in
8583 arm_legitimate_address_outer_p (). */
8584 else if (GET_CODE (x
) == MINUS
)
8586 rtx xop0
= XEXP (x
, 0);
8587 rtx xop1
= XEXP (x
, 1);
8589 if (CONSTANT_P (xop0
))
8590 xop0
= force_reg (SImode
, xop0
);
8592 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8593 xop1
= force_reg (SImode
, xop1
);
8595 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8596 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8599 /* Make sure to take full advantage of the pre-indexed addressing mode
8600 with absolute addresses which often allows for the base register to
8601 be factorized for multiple adjacent memory references, and it might
8602 even allows for the mini pool to be avoided entirely. */
8603 else if (CONST_INT_P (x
) && optimize
> 0)
8606 HOST_WIDE_INT mask
, base
, index
;
8609 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8610 use a 8-bit index. So let's use a 12-bit index for SImode only and
8611 hope that arm_gen_constant will enable ldrb to use more bits. */
8612 bits
= (mode
== SImode
) ? 12 : 8;
8613 mask
= (1 << bits
) - 1;
8614 base
= INTVAL (x
) & ~mask
;
8615 index
= INTVAL (x
) & mask
;
8616 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8618 /* It'll most probably be more efficient to generate the base
8619 with more bits set and use a negative index instead. */
8623 base_reg
= force_reg (SImode
, GEN_INT (base
));
8624 x
= plus_constant (Pmode
, base_reg
, index
);
8629 /* We need to find and carefully transform any SYMBOL and LABEL
8630 references; so go back to the original address expression. */
8631 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8633 if (new_x
!= orig_x
)
8641 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8642 to be legitimate. If we find one, return the new, valid address. */
8644 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8646 if (GET_CODE (x
) == PLUS
8647 && CONST_INT_P (XEXP (x
, 1))
8648 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8649 || INTVAL (XEXP (x
, 1)) < 0))
8651 rtx xop0
= XEXP (x
, 0);
8652 rtx xop1
= XEXP (x
, 1);
8653 HOST_WIDE_INT offset
= INTVAL (xop1
);
8655 /* Try and fold the offset into a biasing of the base register and
8656 then offsetting that. Don't do this when optimizing for space
8657 since it can cause too many CSEs. */
8658 if (optimize_size
&& offset
>= 0
8659 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8661 HOST_WIDE_INT delta
;
8664 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8665 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8666 delta
= 31 * GET_MODE_SIZE (mode
);
8668 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8670 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8672 x
= plus_constant (Pmode
, xop0
, delta
);
8674 else if (offset
< 0 && offset
> -256)
8675 /* Small negative offsets are best done with a subtract before the
8676 dereference, forcing these into a register normally takes two
8678 x
= force_operand (x
, NULL_RTX
);
8681 /* For the remaining cases, force the constant into a register. */
8682 xop1
= force_reg (SImode
, xop1
);
8683 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8686 else if (GET_CODE (x
) == PLUS
8687 && s_register_operand (XEXP (x
, 1), SImode
)
8688 && !s_register_operand (XEXP (x
, 0), SImode
))
8690 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8692 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8697 /* We need to find and carefully transform any SYMBOL and LABEL
8698 references; so go back to the original address expression. */
8699 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8701 if (new_x
!= orig_x
)
8708 /* Return TRUE if X contains any TLS symbol references. */
8711 arm_tls_referenced_p (rtx x
)
8713 if (! TARGET_HAVE_TLS
)
8716 subrtx_iterator::array_type array
;
8717 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8719 const_rtx x
= *iter
;
8720 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8722 /* ARM currently does not provide relocations to encode TLS variables
8723 into AArch32 instructions, only data, so there is no way to
8724 currently implement these if a literal pool is disabled. */
8725 if (arm_disable_literal_pool
)
8726 sorry ("accessing thread-local storage is not currently supported "
8727 "with -mpure-code or -mslow-flash-data");
8732 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8733 TLS offsets, not real symbol references. */
8734 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8735 iter
.skip_subrtxes ();
8740 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8742 On the ARM, allow any integer (invalid ones are removed later by insn
8743 patterns), nice doubles and symbol_refs which refer to the function's
8746 When generating pic allow anything. */
8749 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8751 return flag_pic
|| !label_mentioned_p (x
);
8755 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8757 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8758 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8759 for ARMv8-M Baseline or later the result is valid. */
8760 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8763 return (CONST_INT_P (x
)
8764 || CONST_DOUBLE_P (x
)
8765 || CONSTANT_ADDRESS_P (x
)
8766 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
8771 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8773 return (!arm_cannot_force_const_mem (mode
, x
)
8775 ? arm_legitimate_constant_p_1 (mode
, x
)
8776 : thumb_legitimate_constant_p (mode
, x
)));
8779 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8782 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8786 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8788 split_const (x
, &base
, &offset
);
8789 if (GET_CODE (base
) == SYMBOL_REF
8790 && !offset_within_block_p (base
, INTVAL (offset
)))
8793 return arm_tls_referenced_p (x
);
8796 #define REG_OR_SUBREG_REG(X) \
8798 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8800 #define REG_OR_SUBREG_RTX(X) \
8801 (REG_P (X) ? (X) : SUBREG_REG (X))
8804 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8806 machine_mode mode
= GET_MODE (x
);
8815 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8822 return COSTS_N_INSNS (1);
8825 if (arm_arch6m
&& arm_m_profile_small_mul
)
8826 return COSTS_N_INSNS (32);
8828 if (CONST_INT_P (XEXP (x
, 1)))
8831 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8838 return COSTS_N_INSNS (2) + cycles
;
8840 return COSTS_N_INSNS (1) + 16;
8843 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8845 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8846 return (COSTS_N_INSNS (words
)
8847 + 4 * ((MEM_P (SET_SRC (x
)))
8848 + MEM_P (SET_DEST (x
))));
8853 if (UINTVAL (x
) < 256
8854 /* 16-bit constant. */
8855 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8857 if (thumb_shiftable_const (INTVAL (x
)))
8858 return COSTS_N_INSNS (2);
8859 return COSTS_N_INSNS (3);
8861 else if ((outer
== PLUS
|| outer
== COMPARE
)
8862 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8864 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8865 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8866 return COSTS_N_INSNS (1);
8867 else if (outer
== AND
)
8870 /* This duplicates the tests in the andsi3 expander. */
8871 for (i
= 9; i
<= 31; i
++)
8872 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8873 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8874 return COSTS_N_INSNS (2);
8876 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8877 || outer
== LSHIFTRT
)
8879 return COSTS_N_INSNS (2);
8885 return COSTS_N_INSNS (3);
8903 /* XXX another guess. */
8904 /* Memory costs quite a lot for the first word, but subsequent words
8905 load at the equivalent of a single insn each. */
8906 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8907 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8912 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8918 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8919 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8925 return total
+ COSTS_N_INSNS (1);
8927 /* Assume a two-shift sequence. Increase the cost slightly so
8928 we prefer actual shifts over an extend operation. */
8929 return total
+ 1 + COSTS_N_INSNS (2);
8936 /* Estimates the size cost of thumb1 instructions.
8937 For now most of the code is copied from thumb1_rtx_costs. We need more
8938 fine grain tuning when we have more related test cases. */
8940 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8942 machine_mode mode
= GET_MODE (x
);
8951 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8955 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8956 defined by RTL expansion, especially for the expansion of
8958 if ((GET_CODE (XEXP (x
, 0)) == MULT
8959 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8960 || (GET_CODE (XEXP (x
, 1)) == MULT
8961 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8962 return COSTS_N_INSNS (2);
8967 return COSTS_N_INSNS (1);
8970 if (CONST_INT_P (XEXP (x
, 1)))
8972 /* Thumb1 mul instruction can't operate on const. We must Load it
8973 into a register first. */
8974 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8975 /* For the targets which have a very small and high-latency multiply
8976 unit, we prefer to synthesize the mult with up to 5 instructions,
8977 giving a good balance between size and performance. */
8978 if (arm_arch6m
&& arm_m_profile_small_mul
)
8979 return COSTS_N_INSNS (5);
8981 return COSTS_N_INSNS (1) + const_size
;
8983 return COSTS_N_INSNS (1);
8986 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8988 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8989 cost
= COSTS_N_INSNS (words
);
8990 if (satisfies_constraint_J (SET_SRC (x
))
8991 || satisfies_constraint_K (SET_SRC (x
))
8992 /* Too big an immediate for a 2-byte mov, using MOVT. */
8993 || (CONST_INT_P (SET_SRC (x
))
8994 && UINTVAL (SET_SRC (x
)) >= 256
8996 && satisfies_constraint_j (SET_SRC (x
)))
8997 /* thumb1_movdi_insn. */
8998 || ((words
> 1) && MEM_P (SET_SRC (x
))))
8999 cost
+= COSTS_N_INSNS (1);
9005 if (UINTVAL (x
) < 256)
9006 return COSTS_N_INSNS (1);
9007 /* movw is 4byte long. */
9008 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9009 return COSTS_N_INSNS (2);
9010 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9011 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9012 return COSTS_N_INSNS (2);
9013 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9014 if (thumb_shiftable_const (INTVAL (x
)))
9015 return COSTS_N_INSNS (2);
9016 return COSTS_N_INSNS (3);
9018 else if ((outer
== PLUS
|| outer
== COMPARE
)
9019 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9021 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9022 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9023 return COSTS_N_INSNS (1);
9024 else if (outer
== AND
)
9027 /* This duplicates the tests in the andsi3 expander. */
9028 for (i
= 9; i
<= 31; i
++)
9029 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9030 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9031 return COSTS_N_INSNS (2);
9033 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9034 || outer
== LSHIFTRT
)
9036 return COSTS_N_INSNS (2);
9042 return COSTS_N_INSNS (3);
9056 return COSTS_N_INSNS (1);
9059 return (COSTS_N_INSNS (1)
9061 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9062 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9063 ? COSTS_N_INSNS (1) : 0));
9067 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9072 /* XXX still guessing. */
9073 switch (GET_MODE (XEXP (x
, 0)))
9076 return (1 + (mode
== DImode
? 4 : 0)
9077 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9080 return (4 + (mode
== DImode
? 4 : 0)
9081 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9084 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9095 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9096 operand, then return the operand that is being shifted. If the shift
9097 is not by a constant, then set SHIFT_REG to point to the operand.
9098 Return NULL if OP is not a shifter operand. */
9100 shifter_op_p (rtx op
, rtx
*shift_reg
)
9102 enum rtx_code code
= GET_CODE (op
);
9104 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9105 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9106 return XEXP (op
, 0);
9107 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9108 return XEXP (op
, 0);
9109 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9110 || code
== ASHIFTRT
)
9112 if (!CONST_INT_P (XEXP (op
, 1)))
9113 *shift_reg
= XEXP (op
, 1);
9114 return XEXP (op
, 0);
9121 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9123 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9124 rtx_code code
= GET_CODE (x
);
9125 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9127 switch (XINT (x
, 1))
9129 case UNSPEC_UNALIGNED_LOAD
:
9130 /* We can only do unaligned loads into the integer unit, and we can't
9132 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9134 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9135 + extra_cost
->ldst
.load_unaligned
);
9138 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9139 ADDR_SPACE_GENERIC
, speed_p
);
9143 case UNSPEC_UNALIGNED_STORE
:
9144 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9146 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9147 + extra_cost
->ldst
.store_unaligned
);
9149 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9151 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9152 ADDR_SPACE_GENERIC
, speed_p
);
9163 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9167 *cost
= COSTS_N_INSNS (2);
9173 /* Cost of a libcall. We assume one insn per argument, an amount for the
9174 call (one insn for -Os) and then one for processing the result. */
9175 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9177 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9180 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9181 if (shift_op != NULL \
9182 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9187 *cost += extra_cost->alu.arith_shift_reg; \
9188 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9189 ASHIFT, 1, speed_p); \
9192 *cost += extra_cost->alu.arith_shift; \
9194 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9195 ASHIFT, 0, speed_p) \
9196 + rtx_cost (XEXP (x, 1 - IDX), \
9197 GET_MODE (shift_op), \
9204 /* RTX costs. Make an estimate of the cost of executing the operation
9205 X, which is contained with an operation with code OUTER_CODE.
9206 SPEED_P indicates whether the cost desired is the performance cost,
9207 or the size cost. The estimate is stored in COST and the return
9208 value is TRUE if the cost calculation is final, or FALSE if the
9209 caller should recurse through the operands of X to add additional
9212 We currently make no attempt to model the size savings of Thumb-2
9213 16-bit instructions. At the normal points in compilation where
9214 this code is called we have no measure of whether the condition
9215 flags are live or not, and thus no realistic way to determine what
9216 the size will eventually be. */
9218 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9219 const struct cpu_cost_table
*extra_cost
,
9220 int *cost
, bool speed_p
)
9222 machine_mode mode
= GET_MODE (x
);
9224 *cost
= COSTS_N_INSNS (1);
9229 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9231 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9239 /* SET RTXs don't have a mode so we get it from the destination. */
9240 mode
= GET_MODE (SET_DEST (x
));
9242 if (REG_P (SET_SRC (x
))
9243 && REG_P (SET_DEST (x
)))
9245 /* Assume that most copies can be done with a single insn,
9246 unless we don't have HW FP, in which case everything
9247 larger than word mode will require two insns. */
9248 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9249 && GET_MODE_SIZE (mode
) > 4)
9252 /* Conditional register moves can be encoded
9253 in 16 bits in Thumb mode. */
9254 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9260 if (CONST_INT_P (SET_SRC (x
)))
9262 /* Handle CONST_INT here, since the value doesn't have a mode
9263 and we would otherwise be unable to work out the true cost. */
9264 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9267 /* Slightly lower the cost of setting a core reg to a constant.
9268 This helps break up chains and allows for better scheduling. */
9269 if (REG_P (SET_DEST (x
))
9270 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9273 /* Immediate moves with an immediate in the range [0, 255] can be
9274 encoded in 16 bits in Thumb mode. */
9275 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9276 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9278 goto const_int_cost
;
9284 /* A memory access costs 1 insn if the mode is small, or the address is
9285 a single register, otherwise it costs one insn per word. */
9286 if (REG_P (XEXP (x
, 0)))
9287 *cost
= COSTS_N_INSNS (1);
9289 && GET_CODE (XEXP (x
, 0)) == PLUS
9290 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9291 /* This will be split into two instructions.
9292 See arm.md:calculate_pic_address. */
9293 *cost
= COSTS_N_INSNS (2);
9295 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9297 /* For speed optimizations, add the costs of the address and
9298 accessing memory. */
9301 *cost
+= (extra_cost
->ldst
.load
9302 + arm_address_cost (XEXP (x
, 0), mode
,
9303 ADDR_SPACE_GENERIC
, speed_p
));
9305 *cost
+= extra_cost
->ldst
.load
;
9311 /* Calculations of LDM costs are complex. We assume an initial cost
9312 (ldm_1st) which will load the number of registers mentioned in
9313 ldm_regs_per_insn_1st registers; then each additional
9314 ldm_regs_per_insn_subsequent registers cost one more insn. The
9315 formula for N regs is thus:
9317 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9318 + ldm_regs_per_insn_subsequent - 1)
9319 / ldm_regs_per_insn_subsequent).
9321 Additional costs may also be added for addressing. A similar
9322 formula is used for STM. */
9324 bool is_ldm
= load_multiple_operation (x
, SImode
);
9325 bool is_stm
= store_multiple_operation (x
, SImode
);
9327 if (is_ldm
|| is_stm
)
9331 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9332 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9333 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9334 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9335 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9336 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9337 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9339 *cost
+= regs_per_insn_1st
9340 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9341 + regs_per_insn_sub
- 1)
9342 / regs_per_insn_sub
);
9351 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9352 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9353 *cost
+= COSTS_N_INSNS (speed_p
9354 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9355 else if (mode
== SImode
&& TARGET_IDIV
)
9356 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9358 *cost
= LIBCALL_COST (2);
9360 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9361 possible udiv is prefered. */
9362 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
9363 return false; /* All arguments must be in registers. */
9366 /* MOD by a power of 2 can be expanded as:
9368 and r0, r0, #(n - 1)
9369 and r1, r1, #(n - 1)
9370 rsbpl r0, r1, #0. */
9371 if (CONST_INT_P (XEXP (x
, 1))
9372 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9375 *cost
+= COSTS_N_INSNS (3);
9378 *cost
+= 2 * extra_cost
->alu
.logical
9379 + extra_cost
->alu
.arith
;
9385 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9386 possible udiv is prefered. */
9387 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
9388 return false; /* All arguments must be in registers. */
9391 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9393 *cost
+= (COSTS_N_INSNS (1)
9394 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9396 *cost
+= extra_cost
->alu
.shift_reg
;
9404 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9406 *cost
+= (COSTS_N_INSNS (2)
9407 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9409 *cost
+= 2 * extra_cost
->alu
.shift
;
9412 else if (mode
== SImode
)
9414 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9415 /* Slightly disparage register shifts at -Os, but not by much. */
9416 if (!CONST_INT_P (XEXP (x
, 1)))
9417 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9418 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9421 else if (GET_MODE_CLASS (mode
) == MODE_INT
9422 && GET_MODE_SIZE (mode
) < 4)
9426 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9427 /* Slightly disparage register shifts at -Os, but not by
9429 if (!CONST_INT_P (XEXP (x
, 1)))
9430 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9431 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9433 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9435 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9437 /* Can use SBFX/UBFX. */
9439 *cost
+= extra_cost
->alu
.bfx
;
9440 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9444 *cost
+= COSTS_N_INSNS (1);
9445 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9448 if (CONST_INT_P (XEXP (x
, 1)))
9449 *cost
+= 2 * extra_cost
->alu
.shift
;
9451 *cost
+= (extra_cost
->alu
.shift
9452 + extra_cost
->alu
.shift_reg
);
9455 /* Slightly disparage register shifts. */
9456 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9461 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9462 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9465 if (CONST_INT_P (XEXP (x
, 1)))
9466 *cost
+= (2 * extra_cost
->alu
.shift
9467 + extra_cost
->alu
.log_shift
);
9469 *cost
+= (extra_cost
->alu
.shift
9470 + extra_cost
->alu
.shift_reg
9471 + extra_cost
->alu
.log_shift_reg
);
9477 *cost
= LIBCALL_COST (2);
9486 *cost
+= extra_cost
->alu
.rev
;
9493 /* No rev instruction available. Look at arm_legacy_rev
9494 and thumb_legacy_rev for the form of RTL used then. */
9497 *cost
+= COSTS_N_INSNS (9);
9501 *cost
+= 6 * extra_cost
->alu
.shift
;
9502 *cost
+= 3 * extra_cost
->alu
.logical
;
9507 *cost
+= COSTS_N_INSNS (4);
9511 *cost
+= 2 * extra_cost
->alu
.shift
;
9512 *cost
+= extra_cost
->alu
.arith_shift
;
9513 *cost
+= 2 * extra_cost
->alu
.logical
;
9521 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9522 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9524 if (GET_CODE (XEXP (x
, 0)) == MULT
9525 || GET_CODE (XEXP (x
, 1)) == MULT
)
9527 rtx mul_op0
, mul_op1
, sub_op
;
9530 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9532 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9534 mul_op0
= XEXP (XEXP (x
, 0), 0);
9535 mul_op1
= XEXP (XEXP (x
, 0), 1);
9536 sub_op
= XEXP (x
, 1);
9540 mul_op0
= XEXP (XEXP (x
, 1), 0);
9541 mul_op1
= XEXP (XEXP (x
, 1), 1);
9542 sub_op
= XEXP (x
, 0);
9545 /* The first operand of the multiply may be optionally
9547 if (GET_CODE (mul_op0
) == NEG
)
9548 mul_op0
= XEXP (mul_op0
, 0);
9550 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9551 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9552 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9558 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9564 rtx shift_by_reg
= NULL
;
9568 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9569 if (shift_op
== NULL
)
9571 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9572 non_shift_op
= XEXP (x
, 0);
9575 non_shift_op
= XEXP (x
, 1);
9577 if (shift_op
!= NULL
)
9579 if (shift_by_reg
!= NULL
)
9582 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9583 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9586 *cost
+= extra_cost
->alu
.arith_shift
;
9588 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9589 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9594 && GET_CODE (XEXP (x
, 1)) == MULT
)
9598 *cost
+= extra_cost
->mult
[0].add
;
9599 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9600 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9601 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9605 if (CONST_INT_P (XEXP (x
, 0)))
9607 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9608 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9610 *cost
= COSTS_N_INSNS (insns
);
9612 *cost
+= insns
* extra_cost
->alu
.arith
;
9613 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9617 *cost
+= extra_cost
->alu
.arith
;
9622 if (GET_MODE_CLASS (mode
) == MODE_INT
9623 && GET_MODE_SIZE (mode
) < 4)
9625 rtx shift_op
, shift_reg
;
9628 /* We check both sides of the MINUS for shifter operands since,
9629 unlike PLUS, it's not commutative. */
9631 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9632 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9634 /* Slightly disparage, as we might need to widen the result. */
9637 *cost
+= extra_cost
->alu
.arith
;
9639 if (CONST_INT_P (XEXP (x
, 0)))
9641 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9650 *cost
+= COSTS_N_INSNS (1);
9652 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9654 rtx op1
= XEXP (x
, 1);
9657 *cost
+= 2 * extra_cost
->alu
.arith
;
9659 if (GET_CODE (op1
) == ZERO_EXTEND
)
9660 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9663 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9664 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9668 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9671 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9672 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9674 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9677 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9678 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9681 *cost
+= (extra_cost
->alu
.arith
9682 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9683 ? extra_cost
->alu
.arith
9684 : extra_cost
->alu
.arith_shift
));
9685 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9686 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9687 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9692 *cost
+= 2 * extra_cost
->alu
.arith
;
9698 *cost
= LIBCALL_COST (2);
9702 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9703 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9705 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9707 rtx mul_op0
, mul_op1
, add_op
;
9710 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9712 mul_op0
= XEXP (XEXP (x
, 0), 0);
9713 mul_op1
= XEXP (XEXP (x
, 0), 1);
9714 add_op
= XEXP (x
, 1);
9716 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9717 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9718 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9724 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9727 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9729 *cost
= LIBCALL_COST (2);
9733 /* Narrow modes can be synthesized in SImode, but the range
9734 of useful sub-operations is limited. Check for shift operations
9735 on one of the operands. Only left shifts can be used in the
9737 if (GET_MODE_CLASS (mode
) == MODE_INT
9738 && GET_MODE_SIZE (mode
) < 4)
9740 rtx shift_op
, shift_reg
;
9743 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9745 if (CONST_INT_P (XEXP (x
, 1)))
9747 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9748 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9750 *cost
= COSTS_N_INSNS (insns
);
9752 *cost
+= insns
* extra_cost
->alu
.arith
;
9753 /* Slightly penalize a narrow operation as the result may
9755 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9759 /* Slightly penalize a narrow operation as the result may
9763 *cost
+= extra_cost
->alu
.arith
;
9770 rtx shift_op
, shift_reg
;
9773 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9774 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9776 /* UXTA[BH] or SXTA[BH]. */
9778 *cost
+= extra_cost
->alu
.extend_arith
;
9779 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9781 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9786 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9787 if (shift_op
!= NULL
)
9792 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9793 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9796 *cost
+= extra_cost
->alu
.arith_shift
;
9798 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9799 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9802 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9804 rtx mul_op
= XEXP (x
, 0);
9806 if (TARGET_DSP_MULTIPLY
9807 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9808 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9809 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9810 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9811 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9812 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9813 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9814 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9815 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9816 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9817 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9818 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9823 *cost
+= extra_cost
->mult
[0].extend_add
;
9824 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9825 SIGN_EXTEND
, 0, speed_p
)
9826 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9827 SIGN_EXTEND
, 0, speed_p
)
9828 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9833 *cost
+= extra_cost
->mult
[0].add
;
9834 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9835 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9836 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9839 if (CONST_INT_P (XEXP (x
, 1)))
9841 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9842 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9844 *cost
= COSTS_N_INSNS (insns
);
9846 *cost
+= insns
* extra_cost
->alu
.arith
;
9847 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9851 *cost
+= extra_cost
->alu
.arith
;
9859 && GET_CODE (XEXP (x
, 0)) == MULT
9860 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9861 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9862 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9863 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9866 *cost
+= extra_cost
->mult
[1].extend_add
;
9867 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9868 ZERO_EXTEND
, 0, speed_p
)
9869 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9870 ZERO_EXTEND
, 0, speed_p
)
9871 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9875 *cost
+= COSTS_N_INSNS (1);
9877 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9878 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9881 *cost
+= (extra_cost
->alu
.arith
9882 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9883 ? extra_cost
->alu
.arith
9884 : extra_cost
->alu
.arith_shift
));
9886 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9888 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9893 *cost
+= 2 * extra_cost
->alu
.arith
;
9898 *cost
= LIBCALL_COST (2);
9901 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9904 *cost
+= extra_cost
->alu
.rev
;
9912 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9913 rtx op0
= XEXP (x
, 0);
9914 rtx shift_op
, shift_reg
;
9918 || (code
== IOR
&& TARGET_THUMB2
)))
9919 op0
= XEXP (op0
, 0);
9922 shift_op
= shifter_op_p (op0
, &shift_reg
);
9923 if (shift_op
!= NULL
)
9928 *cost
+= extra_cost
->alu
.log_shift_reg
;
9929 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9932 *cost
+= extra_cost
->alu
.log_shift
;
9934 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9935 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9939 if (CONST_INT_P (XEXP (x
, 1)))
9941 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9942 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9945 *cost
= COSTS_N_INSNS (insns
);
9947 *cost
+= insns
* extra_cost
->alu
.logical
;
9948 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9953 *cost
+= extra_cost
->alu
.logical
;
9954 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9955 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9961 rtx op0
= XEXP (x
, 0);
9962 enum rtx_code subcode
= GET_CODE (op0
);
9964 *cost
+= COSTS_N_INSNS (1);
9968 || (code
== IOR
&& TARGET_THUMB2
)))
9969 op0
= XEXP (op0
, 0);
9971 if (GET_CODE (op0
) == ZERO_EXTEND
)
9974 *cost
+= 2 * extra_cost
->alu
.logical
;
9976 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9978 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9981 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9984 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9986 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9988 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9993 *cost
+= 2 * extra_cost
->alu
.logical
;
9999 *cost
= LIBCALL_COST (2);
10003 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10004 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10006 rtx op0
= XEXP (x
, 0);
10008 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10009 op0
= XEXP (op0
, 0);
10012 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10014 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10015 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10018 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10020 *cost
= LIBCALL_COST (2);
10024 if (mode
== SImode
)
10026 if (TARGET_DSP_MULTIPLY
10027 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10028 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10029 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10030 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10031 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10032 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10033 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10034 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10035 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10036 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10037 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10038 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10041 /* SMUL[TB][TB]. */
10043 *cost
+= extra_cost
->mult
[0].extend
;
10044 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10045 SIGN_EXTEND
, 0, speed_p
);
10046 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10047 SIGN_EXTEND
, 1, speed_p
);
10051 *cost
+= extra_cost
->mult
[0].simple
;
10055 if (mode
== DImode
)
10058 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10059 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10060 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10061 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10064 *cost
+= extra_cost
->mult
[1].extend
;
10065 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10066 ZERO_EXTEND
, 0, speed_p
)
10067 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10068 ZERO_EXTEND
, 0, speed_p
));
10072 *cost
= LIBCALL_COST (2);
10077 *cost
= LIBCALL_COST (2);
10081 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10082 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10084 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10087 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10092 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10096 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10098 *cost
= LIBCALL_COST (1);
10102 if (mode
== SImode
)
10104 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10106 *cost
+= COSTS_N_INSNS (1);
10107 /* Assume the non-flag-changing variant. */
10109 *cost
+= (extra_cost
->alu
.log_shift
10110 + extra_cost
->alu
.arith_shift
);
10111 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10115 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10116 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10118 *cost
+= COSTS_N_INSNS (1);
10119 /* No extra cost for MOV imm and MVN imm. */
10120 /* If the comparison op is using the flags, there's no further
10121 cost, otherwise we need to add the cost of the comparison. */
10122 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10123 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10124 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10126 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10127 *cost
+= (COSTS_N_INSNS (1)
10128 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10130 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10133 *cost
+= extra_cost
->alu
.arith
;
10139 *cost
+= extra_cost
->alu
.arith
;
10143 if (GET_MODE_CLASS (mode
) == MODE_INT
10144 && GET_MODE_SIZE (mode
) < 4)
10146 /* Slightly disparage, as we might need an extend operation. */
10149 *cost
+= extra_cost
->alu
.arith
;
10153 if (mode
== DImode
)
10155 *cost
+= COSTS_N_INSNS (1);
10157 *cost
+= 2 * extra_cost
->alu
.arith
;
10162 *cost
= LIBCALL_COST (1);
10166 if (mode
== SImode
)
10169 rtx shift_reg
= NULL
;
10171 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10175 if (shift_reg
!= NULL
)
10178 *cost
+= extra_cost
->alu
.log_shift_reg
;
10179 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10182 *cost
+= extra_cost
->alu
.log_shift
;
10183 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10188 *cost
+= extra_cost
->alu
.logical
;
10191 if (mode
== DImode
)
10193 *cost
+= COSTS_N_INSNS (1);
10199 *cost
+= LIBCALL_COST (1);
10204 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10206 *cost
+= COSTS_N_INSNS (3);
10209 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10210 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10212 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10213 /* Assume that if one arm of the if_then_else is a register,
10214 that it will be tied with the result and eliminate the
10215 conditional insn. */
10216 if (REG_P (XEXP (x
, 1)))
10218 else if (REG_P (XEXP (x
, 2)))
10224 if (extra_cost
->alu
.non_exec_costs_exec
)
10225 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10227 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10230 *cost
+= op1cost
+ op2cost
;
10236 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10240 machine_mode op0mode
;
10241 /* We'll mostly assume that the cost of a compare is the cost of the
10242 LHS. However, there are some notable exceptions. */
10244 /* Floating point compares are never done as side-effects. */
10245 op0mode
= GET_MODE (XEXP (x
, 0));
10246 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10247 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10250 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10252 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10254 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10260 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10262 *cost
= LIBCALL_COST (2);
10266 /* DImode compares normally take two insns. */
10267 if (op0mode
== DImode
)
10269 *cost
+= COSTS_N_INSNS (1);
10271 *cost
+= 2 * extra_cost
->alu
.arith
;
10275 if (op0mode
== SImode
)
10280 if (XEXP (x
, 1) == const0_rtx
10281 && !(REG_P (XEXP (x
, 0))
10282 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10283 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10285 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10287 /* Multiply operations that set the flags are often
10288 significantly more expensive. */
10290 && GET_CODE (XEXP (x
, 0)) == MULT
10291 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10292 *cost
+= extra_cost
->mult
[0].flag_setting
;
10295 && GET_CODE (XEXP (x
, 0)) == PLUS
10296 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10297 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10299 *cost
+= extra_cost
->mult
[0].flag_setting
;
10304 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10305 if (shift_op
!= NULL
)
10307 if (shift_reg
!= NULL
)
10309 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10312 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10315 *cost
+= extra_cost
->alu
.arith_shift
;
10316 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10317 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10322 *cost
+= extra_cost
->alu
.arith
;
10323 if (CONST_INT_P (XEXP (x
, 1))
10324 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10326 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10334 *cost
= LIBCALL_COST (2);
10357 if (outer_code
== SET
)
10359 /* Is it a store-flag operation? */
10360 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10361 && XEXP (x
, 1) == const0_rtx
)
10363 /* Thumb also needs an IT insn. */
10364 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10367 if (XEXP (x
, 1) == const0_rtx
)
10372 /* LSR Rd, Rn, #31. */
10374 *cost
+= extra_cost
->alu
.shift
;
10384 *cost
+= COSTS_N_INSNS (1);
10388 /* RSBS T1, Rn, Rn, LSR #31
10390 *cost
+= COSTS_N_INSNS (1);
10392 *cost
+= extra_cost
->alu
.arith_shift
;
10396 /* RSB Rd, Rn, Rn, ASR #1
10397 LSR Rd, Rd, #31. */
10398 *cost
+= COSTS_N_INSNS (1);
10400 *cost
+= (extra_cost
->alu
.arith_shift
10401 + extra_cost
->alu
.shift
);
10407 *cost
+= COSTS_N_INSNS (1);
10409 *cost
+= extra_cost
->alu
.shift
;
10413 /* Remaining cases are either meaningless or would take
10414 three insns anyway. */
10415 *cost
= COSTS_N_INSNS (3);
10418 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10423 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10424 if (CONST_INT_P (XEXP (x
, 1))
10425 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10427 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10434 /* Not directly inside a set. If it involves the condition code
10435 register it must be the condition for a branch, cond_exec or
10436 I_T_E operation. Since the comparison is performed elsewhere
10437 this is just the control part which has no additional
10439 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10440 && XEXP (x
, 1) == const0_rtx
)
10448 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10449 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10452 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10456 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10458 *cost
= LIBCALL_COST (1);
10462 if (mode
== SImode
)
10465 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10469 *cost
= LIBCALL_COST (1);
10473 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10474 && MEM_P (XEXP (x
, 0)))
10476 if (mode
== DImode
)
10477 *cost
+= COSTS_N_INSNS (1);
10482 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10483 *cost
+= extra_cost
->ldst
.load
;
10485 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10487 if (mode
== DImode
)
10488 *cost
+= extra_cost
->alu
.shift
;
10493 /* Widening from less than 32-bits requires an extend operation. */
10494 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10496 /* We have SXTB/SXTH. */
10497 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10499 *cost
+= extra_cost
->alu
.extend
;
10501 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10503 /* Needs two shifts. */
10504 *cost
+= COSTS_N_INSNS (1);
10505 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10507 *cost
+= 2 * extra_cost
->alu
.shift
;
10510 /* Widening beyond 32-bits requires one more insn. */
10511 if (mode
== DImode
)
10513 *cost
+= COSTS_N_INSNS (1);
10515 *cost
+= extra_cost
->alu
.shift
;
10522 || GET_MODE (XEXP (x
, 0)) == SImode
10523 || GET_MODE (XEXP (x
, 0)) == QImode
)
10524 && MEM_P (XEXP (x
, 0)))
10526 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10528 if (mode
== DImode
)
10529 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10534 /* Widening from less than 32-bits requires an extend operation. */
10535 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10537 /* UXTB can be a shorter instruction in Thumb2, but it might
10538 be slower than the AND Rd, Rn, #255 alternative. When
10539 optimizing for speed it should never be slower to use
10540 AND, and we don't really model 16-bit vs 32-bit insns
10543 *cost
+= extra_cost
->alu
.logical
;
10545 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10547 /* We have UXTB/UXTH. */
10548 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10550 *cost
+= extra_cost
->alu
.extend
;
10552 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10554 /* Needs two shifts. It's marginally preferable to use
10555 shifts rather than two BIC instructions as the second
10556 shift may merge with a subsequent insn as a shifter
10558 *cost
= COSTS_N_INSNS (2);
10559 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10561 *cost
+= 2 * extra_cost
->alu
.shift
;
10564 /* Widening beyond 32-bits requires one more insn. */
10565 if (mode
== DImode
)
10567 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10574 /* CONST_INT has no mode, so we cannot tell for sure how many
10575 insns are really going to be needed. The best we can do is
10576 look at the value passed. If it fits in SImode, then assume
10577 that's the mode it will be used for. Otherwise assume it
10578 will be used in DImode. */
10579 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10584 /* Avoid blowing up in arm_gen_constant (). */
10585 if (!(outer_code
== PLUS
10586 || outer_code
== AND
10587 || outer_code
== IOR
10588 || outer_code
== XOR
10589 || outer_code
== MINUS
))
10593 if (mode
== SImode
)
10595 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10596 INTVAL (x
), NULL
, NULL
,
10602 *cost
+= COSTS_N_INSNS (arm_gen_constant
10603 (outer_code
, SImode
, NULL
,
10604 trunc_int_for_mode (INTVAL (x
), SImode
),
10606 + arm_gen_constant (outer_code
, SImode
, NULL
,
10607 INTVAL (x
) >> 32, NULL
,
10619 if (arm_arch_thumb2
&& !flag_pic
)
10620 *cost
+= COSTS_N_INSNS (1);
10622 *cost
+= extra_cost
->ldst
.load
;
10625 *cost
+= COSTS_N_INSNS (1);
10629 *cost
+= COSTS_N_INSNS (1);
10631 *cost
+= extra_cost
->alu
.arith
;
10637 *cost
= COSTS_N_INSNS (4);
10642 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10643 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10645 if (vfp3_const_double_rtx (x
))
10648 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10654 if (mode
== DFmode
)
10655 *cost
+= extra_cost
->ldst
.loadd
;
10657 *cost
+= extra_cost
->ldst
.loadf
;
10660 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10664 *cost
= COSTS_N_INSNS (4);
10670 && TARGET_HARD_FLOAT
10671 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10672 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10673 *cost
= COSTS_N_INSNS (1);
10675 *cost
= COSTS_N_INSNS (4);
10680 /* When optimizing for size, we prefer constant pool entries to
10681 MOVW/MOVT pairs, so bump the cost of these slightly. */
10688 *cost
+= extra_cost
->alu
.clz
;
10692 if (XEXP (x
, 1) == const0_rtx
)
10695 *cost
+= extra_cost
->alu
.log_shift
;
10696 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10699 /* Fall through. */
10703 *cost
+= COSTS_N_INSNS (1);
10707 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10708 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10709 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10710 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10711 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10712 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10713 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10714 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10718 *cost
+= extra_cost
->mult
[1].extend
;
10719 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10720 ZERO_EXTEND
, 0, speed_p
)
10721 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10722 ZERO_EXTEND
, 0, speed_p
));
10725 *cost
= LIBCALL_COST (1);
10728 case UNSPEC_VOLATILE
:
10730 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10733 /* Reading the PC is like reading any other register. Writing it
10734 is more expensive, but we take that into account elsewhere. */
10739 /* TODO: Simple zero_extract of bottom bits using AND. */
10740 /* Fall through. */
10744 && CONST_INT_P (XEXP (x
, 1))
10745 && CONST_INT_P (XEXP (x
, 2)))
10748 *cost
+= extra_cost
->alu
.bfx
;
10749 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10752 /* Without UBFX/SBFX, need to resort to shift operations. */
10753 *cost
+= COSTS_N_INSNS (1);
10755 *cost
+= 2 * extra_cost
->alu
.shift
;
10756 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10760 if (TARGET_HARD_FLOAT
)
10763 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10765 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10767 /* Pre v8, widening HF->DF is a two-step process, first
10768 widening to SFmode. */
10769 *cost
+= COSTS_N_INSNS (1);
10771 *cost
+= extra_cost
->fp
[0].widen
;
10773 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10777 *cost
= LIBCALL_COST (1);
10780 case FLOAT_TRUNCATE
:
10781 if (TARGET_HARD_FLOAT
)
10784 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10785 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10787 /* Vector modes? */
10789 *cost
= LIBCALL_COST (1);
10793 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10795 rtx op0
= XEXP (x
, 0);
10796 rtx op1
= XEXP (x
, 1);
10797 rtx op2
= XEXP (x
, 2);
10800 /* vfms or vfnma. */
10801 if (GET_CODE (op0
) == NEG
)
10802 op0
= XEXP (op0
, 0);
10804 /* vfnms or vfnma. */
10805 if (GET_CODE (op2
) == NEG
)
10806 op2
= XEXP (op2
, 0);
10808 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10809 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10810 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10813 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10818 *cost
= LIBCALL_COST (3);
10823 if (TARGET_HARD_FLOAT
)
10825 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10826 a vcvt fixed-point conversion. */
10827 if (code
== FIX
&& mode
== SImode
10828 && GET_CODE (XEXP (x
, 0)) == FIX
10829 && GET_MODE (XEXP (x
, 0)) == SFmode
10830 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10831 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10835 *cost
+= extra_cost
->fp
[0].toint
;
10837 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10842 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10844 mode
= GET_MODE (XEXP (x
, 0));
10846 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10847 /* Strip of the 'cost' of rounding towards zero. */
10848 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10849 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10852 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10853 /* ??? Increase the cost to deal with transferring from
10854 FP -> CORE registers? */
10857 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10861 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10864 /* Vector costs? */
10866 *cost
= LIBCALL_COST (1);
10870 case UNSIGNED_FLOAT
:
10871 if (TARGET_HARD_FLOAT
)
10873 /* ??? Increase the cost to deal with transferring from CORE
10874 -> FP registers? */
10876 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10879 *cost
= LIBCALL_COST (1);
10887 /* Just a guess. Guess number of instructions in the asm
10888 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10889 though (see PR60663). */
10890 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10891 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10893 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10897 if (mode
!= VOIDmode
)
10898 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10900 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10905 #undef HANDLE_NARROW_SHIFT_ARITH
10907 /* RTX costs entry point. */
10910 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10911 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10914 int code
= GET_CODE (x
);
10915 gcc_assert (current_tune
->insn_extra_cost
);
10917 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10918 (enum rtx_code
) outer_code
,
10919 current_tune
->insn_extra_cost
,
10922 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10924 print_rtl_single (dump_file
, x
);
10925 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10926 *total
, result
? "final" : "partial");
10931 /* All address computations that can be done are free, but rtx cost returns
10932 the same for practically all of them. So we weight the different types
10933 of address here in the order (most pref first):
10934 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10936 arm_arm_address_cost (rtx x
)
10938 enum rtx_code c
= GET_CODE (x
);
10940 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10942 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10947 if (CONST_INT_P (XEXP (x
, 1)))
10950 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10960 arm_thumb_address_cost (rtx x
)
10962 enum rtx_code c
= GET_CODE (x
);
10967 && REG_P (XEXP (x
, 0))
10968 && CONST_INT_P (XEXP (x
, 1)))
10975 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10976 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10978 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10981 /* Adjust cost hook for XScale. */
10983 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10986 /* Some true dependencies can have a higher cost depending
10987 on precisely how certain input operands are used. */
10989 && recog_memoized (insn
) >= 0
10990 && recog_memoized (dep
) >= 0)
10992 int shift_opnum
= get_attr_shift (insn
);
10993 enum attr_type attr_type
= get_attr_type (dep
);
10995 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10996 operand for INSN. If we have a shifted input operand and the
10997 instruction we depend on is another ALU instruction, then we may
10998 have to account for an additional stall. */
10999 if (shift_opnum
!= 0
11000 && (attr_type
== TYPE_ALU_SHIFT_IMM
11001 || attr_type
== TYPE_ALUS_SHIFT_IMM
11002 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11003 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11004 || attr_type
== TYPE_ALU_SHIFT_REG
11005 || attr_type
== TYPE_ALUS_SHIFT_REG
11006 || attr_type
== TYPE_LOGIC_SHIFT_REG
11007 || attr_type
== TYPE_LOGICS_SHIFT_REG
11008 || attr_type
== TYPE_MOV_SHIFT
11009 || attr_type
== TYPE_MVN_SHIFT
11010 || attr_type
== TYPE_MOV_SHIFT_REG
11011 || attr_type
== TYPE_MVN_SHIFT_REG
))
11013 rtx shifted_operand
;
11016 /* Get the shifted operand. */
11017 extract_insn (insn
);
11018 shifted_operand
= recog_data
.operand
[shift_opnum
];
11020 /* Iterate over all the operands in DEP. If we write an operand
11021 that overlaps with SHIFTED_OPERAND, then we have increase the
11022 cost of this dependency. */
11023 extract_insn (dep
);
11024 preprocess_constraints (dep
);
11025 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11027 /* We can ignore strict inputs. */
11028 if (recog_data
.operand_type
[opno
] == OP_IN
)
11031 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11043 /* Adjust cost hook for Cortex A9. */
11045 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11055 case REG_DEP_OUTPUT
:
11056 if (recog_memoized (insn
) >= 0
11057 && recog_memoized (dep
) >= 0)
11059 if (GET_CODE (PATTERN (insn
)) == SET
)
11062 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11064 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11066 enum attr_type attr_type_insn
= get_attr_type (insn
);
11067 enum attr_type attr_type_dep
= get_attr_type (dep
);
11069 /* By default all dependencies of the form
11072 have an extra latency of 1 cycle because
11073 of the input and output dependency in this
11074 case. However this gets modeled as an true
11075 dependency and hence all these checks. */
11076 if (REG_P (SET_DEST (PATTERN (insn
)))
11077 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11079 /* FMACS is a special case where the dependent
11080 instruction can be issued 3 cycles before
11081 the normal latency in case of an output
11083 if ((attr_type_insn
== TYPE_FMACS
11084 || attr_type_insn
== TYPE_FMACD
)
11085 && (attr_type_dep
== TYPE_FMACS
11086 || attr_type_dep
== TYPE_FMACD
))
11088 if (dep_type
== REG_DEP_OUTPUT
)
11089 *cost
= insn_default_latency (dep
) - 3;
11091 *cost
= insn_default_latency (dep
);
11096 if (dep_type
== REG_DEP_OUTPUT
)
11097 *cost
= insn_default_latency (dep
) + 1;
11099 *cost
= insn_default_latency (dep
);
11109 gcc_unreachable ();
11115 /* Adjust cost hook for FA726TE. */
11117 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11120 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11121 have penalty of 3. */
11122 if (dep_type
== REG_DEP_TRUE
11123 && recog_memoized (insn
) >= 0
11124 && recog_memoized (dep
) >= 0
11125 && get_attr_conds (dep
) == CONDS_SET
)
11127 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11128 if (get_attr_conds (insn
) == CONDS_USE
11129 && get_attr_type (insn
) != TYPE_BRANCH
)
11135 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11136 || get_attr_conds (insn
) == CONDS_USE
)
11146 /* Implement TARGET_REGISTER_MOVE_COST.
11148 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11149 it is typically more expensive than a single memory access. We set
11150 the cost to less than two memory accesses so that floating
11151 point to integer conversion does not go through memory. */
11154 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11155 reg_class_t from
, reg_class_t to
)
11159 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11160 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11162 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11163 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11165 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11172 if (from
== HI_REGS
|| to
== HI_REGS
)
11179 /* Implement TARGET_MEMORY_MOVE_COST. */
11182 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11183 bool in ATTRIBUTE_UNUSED
)
11189 if (GET_MODE_SIZE (mode
) < 4)
11192 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11196 /* Vectorizer cost model implementation. */
11198 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11200 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11202 int misalign ATTRIBUTE_UNUSED
)
11206 switch (type_of_cost
)
11209 return current_tune
->vec_costs
->scalar_stmt_cost
;
11212 return current_tune
->vec_costs
->scalar_load_cost
;
11215 return current_tune
->vec_costs
->scalar_store_cost
;
11218 return current_tune
->vec_costs
->vec_stmt_cost
;
11221 return current_tune
->vec_costs
->vec_align_load_cost
;
11224 return current_tune
->vec_costs
->vec_store_cost
;
11226 case vec_to_scalar
:
11227 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11229 case scalar_to_vec
:
11230 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11232 case unaligned_load
:
11233 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11235 case unaligned_store
:
11236 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11238 case cond_branch_taken
:
11239 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11241 case cond_branch_not_taken
:
11242 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11245 case vec_promote_demote
:
11246 return current_tune
->vec_costs
->vec_stmt_cost
;
11248 case vec_construct
:
11249 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11250 return elements
/ 2 + 1;
11253 gcc_unreachable ();
11257 /* Implement targetm.vectorize.add_stmt_cost. */
11260 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11261 struct _stmt_vec_info
*stmt_info
, int misalign
,
11262 enum vect_cost_model_location where
)
11264 unsigned *cost
= (unsigned *) data
;
11265 unsigned retval
= 0;
11267 if (flag_vect_cost_model
)
11269 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11270 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11272 /* Statements in an inner loop relative to the loop being
11273 vectorized are weighted more heavily. The value here is
11274 arbitrary and could potentially be improved with analysis. */
11275 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11276 count
*= 50; /* FIXME. */
11278 retval
= (unsigned) (count
* stmt_cost
);
11279 cost
[where
] += retval
;
11285 /* Return true if and only if this insn can dual-issue only as older. */
11287 cortexa7_older_only (rtx_insn
*insn
)
11289 if (recog_memoized (insn
) < 0)
11292 switch (get_attr_type (insn
))
11294 case TYPE_ALU_DSP_REG
:
11295 case TYPE_ALU_SREG
:
11296 case TYPE_ALUS_SREG
:
11297 case TYPE_LOGIC_REG
:
11298 case TYPE_LOGICS_REG
:
11300 case TYPE_ADCS_REG
:
11305 case TYPE_SHIFT_IMM
:
11306 case TYPE_SHIFT_REG
:
11307 case TYPE_LOAD_BYTE
:
11310 case TYPE_FFARITHS
:
11312 case TYPE_FFARITHD
:
11330 case TYPE_F_STORES
:
11337 /* Return true if and only if this insn can dual-issue as younger. */
11339 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11341 if (recog_memoized (insn
) < 0)
11344 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11348 switch (get_attr_type (insn
))
11351 case TYPE_ALUS_IMM
:
11352 case TYPE_LOGIC_IMM
:
11353 case TYPE_LOGICS_IMM
:
11358 case TYPE_MOV_SHIFT
:
11359 case TYPE_MOV_SHIFT_REG
:
11369 /* Look for an instruction that can dual issue only as an older
11370 instruction, and move it in front of any instructions that can
11371 dual-issue as younger, while preserving the relative order of all
11372 other instructions in the ready list. This is a hueuristic to help
11373 dual-issue in later cycles, by postponing issue of more flexible
11374 instructions. This heuristic may affect dual issue opportunities
11375 in the current cycle. */
11377 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11378 int *n_readyp
, int clock
)
11381 int first_older_only
= -1, first_younger
= -1;
11385 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11389 /* Traverse the ready list from the head (the instruction to issue
11390 first), and looking for the first instruction that can issue as
11391 younger and the first instruction that can dual-issue only as
11393 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11395 rtx_insn
*insn
= ready
[i
];
11396 if (cortexa7_older_only (insn
))
11398 first_older_only
= i
;
11400 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11403 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11407 /* Nothing to reorder because either no younger insn found or insn
11408 that can dual-issue only as older appears before any insn that
11409 can dual-issue as younger. */
11410 if (first_younger
== -1)
11413 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11417 /* Nothing to reorder because no older-only insn in the ready list. */
11418 if (first_older_only
== -1)
11421 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11425 /* Move first_older_only insn before first_younger. */
11427 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11428 INSN_UID(ready
[first_older_only
]),
11429 INSN_UID(ready
[first_younger
]));
11430 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11431 for (i
= first_older_only
; i
< first_younger
; i
++)
11433 ready
[i
] = ready
[i
+1];
11436 ready
[i
] = first_older_only_insn
;
11440 /* Implement TARGET_SCHED_REORDER. */
11442 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11447 case TARGET_CPU_cortexa7
:
11448 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11451 /* Do nothing for other cores. */
11455 return arm_issue_rate ();
11458 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11459 It corrects the value of COST based on the relationship between
11460 INSN and DEP through the dependence LINK. It returns the new
11461 value. There is a per-core adjust_cost hook to adjust scheduler costs
11462 and the per-core hook can choose to completely override the generic
11463 adjust_cost function. Only put bits of code into arm_adjust_cost that
11464 are common across all cores. */
11466 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11471 /* When generating Thumb-1 code, we want to place flag-setting operations
11472 close to a conditional branch which depends on them, so that we can
11473 omit the comparison. */
11476 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11477 && recog_memoized (dep
) >= 0
11478 && get_attr_conds (dep
) == CONDS_SET
)
11481 if (current_tune
->sched_adjust_cost
!= NULL
)
11483 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11487 /* XXX Is this strictly true? */
11488 if (dep_type
== REG_DEP_ANTI
11489 || dep_type
== REG_DEP_OUTPUT
)
11492 /* Call insns don't incur a stall, even if they follow a load. */
11497 if ((i_pat
= single_set (insn
)) != NULL
11498 && MEM_P (SET_SRC (i_pat
))
11499 && (d_pat
= single_set (dep
)) != NULL
11500 && MEM_P (SET_DEST (d_pat
)))
11502 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11503 /* This is a load after a store, there is no conflict if the load reads
11504 from a cached area. Assume that loads from the stack, and from the
11505 constant pool are cached, and that others will miss. This is a
11508 if ((GET_CODE (src_mem
) == SYMBOL_REF
11509 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11510 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11511 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11512 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11520 arm_max_conditional_execute (void)
11522 return max_insns_skipped
;
11526 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11529 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11531 return (optimize
> 0) ? 2 : 0;
11535 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11537 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11540 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11541 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11542 sequences of non-executed instructions in IT blocks probably take the same
11543 amount of time as executed instructions (and the IT instruction itself takes
11544 space in icache). This function was experimentally determined to give good
11545 results on a popular embedded benchmark. */
11548 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11550 return (TARGET_32BIT
&& speed_p
) ? 1
11551 : arm_default_branch_cost (speed_p
, predictable_p
);
11555 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11557 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11560 static bool fp_consts_inited
= false;
11562 static REAL_VALUE_TYPE value_fp0
;
11565 init_fp_table (void)
11569 r
= REAL_VALUE_ATOF ("0", DFmode
);
11571 fp_consts_inited
= true;
11574 /* Return TRUE if rtx X is a valid immediate FP constant. */
11576 arm_const_double_rtx (rtx x
)
11578 const REAL_VALUE_TYPE
*r
;
11580 if (!fp_consts_inited
)
11583 r
= CONST_DOUBLE_REAL_VALUE (x
);
11584 if (REAL_VALUE_MINUS_ZERO (*r
))
11587 if (real_equal (r
, &value_fp0
))
11593 /* VFPv3 has a fairly wide range of representable immediates, formed from
11594 "quarter-precision" floating-point values. These can be evaluated using this
11595 formula (with ^ for exponentiation):
11599 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11600 16 <= n <= 31 and 0 <= r <= 7.
11602 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11604 - A (most-significant) is the sign bit.
11605 - BCD are the exponent (encoded as r XOR 3).
11606 - EFGH are the mantissa (encoded as n - 16).
11609 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11610 fconst[sd] instruction, or -1 if X isn't suitable. */
11612 vfp3_const_double_index (rtx x
)
11614 REAL_VALUE_TYPE r
, m
;
11615 int sign
, exponent
;
11616 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11617 unsigned HOST_WIDE_INT mask
;
11618 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11621 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11624 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11626 /* We can't represent these things, so detect them first. */
11627 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11630 /* Extract sign, exponent and mantissa. */
11631 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11632 r
= real_value_abs (&r
);
11633 exponent
= REAL_EXP (&r
);
11634 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11635 highest (sign) bit, with a fixed binary point at bit point_pos.
11636 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11637 bits for the mantissa, this may fail (low bits would be lost). */
11638 real_ldexp (&m
, &r
, point_pos
- exponent
);
11639 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11640 mantissa
= w
.elt (0);
11641 mant_hi
= w
.elt (1);
11643 /* If there are bits set in the low part of the mantissa, we can't
11644 represent this value. */
11648 /* Now make it so that mantissa contains the most-significant bits, and move
11649 the point_pos to indicate that the least-significant bits have been
11651 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11652 mantissa
= mant_hi
;
11654 /* We can permit four significant bits of mantissa only, plus a high bit
11655 which is always 1. */
11656 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11657 if ((mantissa
& mask
) != 0)
11660 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11661 mantissa
>>= point_pos
- 5;
11663 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11664 floating-point immediate zero with Neon using an integer-zero load, but
11665 that case is handled elsewhere.) */
11669 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11671 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11672 normalized significands are in the range [1, 2). (Our mantissa is shifted
11673 left 4 places at this point relative to normalized IEEE754 values). GCC
11674 internally uses [0.5, 1) (see real.c), so the exponent returned from
11675 REAL_EXP must be altered. */
11676 exponent
= 5 - exponent
;
11678 if (exponent
< 0 || exponent
> 7)
11681 /* Sign, mantissa and exponent are now in the correct form to plug into the
11682 formula described in the comment above. */
11683 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11686 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11688 vfp3_const_double_rtx (rtx x
)
11693 return vfp3_const_double_index (x
) != -1;
11696 /* Recognize immediates which can be used in various Neon instructions. Legal
11697 immediates are described by the following table (for VMVN variants, the
11698 bitwise inverse of the constant shown is recognized. In either case, VMOV
11699 is output and the correct instruction to use for a given constant is chosen
11700 by the assembler). The constant shown is replicated across all elements of
11701 the destination vector.
11703 insn elems variant constant (binary)
11704 ---- ----- ------- -----------------
11705 vmov i32 0 00000000 00000000 00000000 abcdefgh
11706 vmov i32 1 00000000 00000000 abcdefgh 00000000
11707 vmov i32 2 00000000 abcdefgh 00000000 00000000
11708 vmov i32 3 abcdefgh 00000000 00000000 00000000
11709 vmov i16 4 00000000 abcdefgh
11710 vmov i16 5 abcdefgh 00000000
11711 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11712 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11713 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11714 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11715 vmvn i16 10 00000000 abcdefgh
11716 vmvn i16 11 abcdefgh 00000000
11717 vmov i32 12 00000000 00000000 abcdefgh 11111111
11718 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11719 vmov i32 14 00000000 abcdefgh 11111111 11111111
11720 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11721 vmov i8 16 abcdefgh
11722 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11723 eeeeeeee ffffffff gggggggg hhhhhhhh
11724 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11725 vmov f32 19 00000000 00000000 00000000 00000000
11727 For case 18, B = !b. Representable values are exactly those accepted by
11728 vfp3_const_double_index, but are output as floating-point numbers rather
11731 For case 19, we will change it to vmov.i32 when assembling.
11733 Variants 0-5 (inclusive) may also be used as immediates for the second
11734 operand of VORR/VBIC instructions.
11736 The INVERSE argument causes the bitwise inverse of the given operand to be
11737 recognized instead (used for recognizing legal immediates for the VAND/VORN
11738 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11739 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11740 output, rather than the real insns vbic/vorr).
11742 INVERSE makes no difference to the recognition of float vectors.
11744 The return value is the variant of immediate as shown in the above table, or
11745 -1 if the given value doesn't match any of the listed patterns.
11748 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11749 rtx
*modconst
, int *elementwidth
)
11751 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11753 for (i = 0; i < idx; i += (STRIDE)) \
11758 immtype = (CLASS); \
11759 elsize = (ELSIZE); \
11763 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11764 unsigned int innersize
;
11765 unsigned char bytes
[16];
11766 int immtype
= -1, matches
;
11767 unsigned int invmask
= inverse
? 0xff : 0;
11768 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11771 n_elts
= CONST_VECTOR_NUNITS (op
);
11775 if (mode
== VOIDmode
)
11779 innersize
= GET_MODE_UNIT_SIZE (mode
);
11781 /* Vectors of float constants. */
11782 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11784 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11786 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11789 /* FP16 vectors cannot be represented. */
11790 if (GET_MODE_INNER (mode
) == HFmode
)
11793 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11794 are distinct in this context. */
11795 if (!const_vec_duplicate_p (op
))
11799 *modconst
= CONST_VECTOR_ELT (op
, 0);
11804 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11810 /* The tricks done in the code below apply for little-endian vector layout.
11811 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11812 FIXME: Implement logic for big-endian vectors. */
11813 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11816 /* Splat vector constant out into a byte vector. */
11817 for (i
= 0; i
< n_elts
; i
++)
11819 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11820 unsigned HOST_WIDE_INT elpart
;
11822 gcc_assert (CONST_INT_P (el
));
11823 elpart
= INTVAL (el
);
11825 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11827 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11828 elpart
>>= BITS_PER_UNIT
;
11832 /* Sanity check. */
11833 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11837 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11838 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11840 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11841 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11843 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11844 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11846 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11847 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11849 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11851 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11853 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11854 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11856 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11857 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11859 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11860 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11862 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11863 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11865 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11867 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11869 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11870 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11872 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11873 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11875 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11876 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11878 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11879 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11881 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11883 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11884 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11892 *elementwidth
= elsize
;
11896 unsigned HOST_WIDE_INT imm
= 0;
11898 /* Un-invert bytes of recognized vector, if necessary. */
11900 for (i
= 0; i
< idx
; i
++)
11901 bytes
[i
] ^= invmask
;
11905 /* FIXME: Broken on 32-bit H_W_I hosts. */
11906 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11908 for (i
= 0; i
< 8; i
++)
11909 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11910 << (i
* BITS_PER_UNIT
);
11912 *modconst
= GEN_INT (imm
);
11916 unsigned HOST_WIDE_INT imm
= 0;
11918 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11919 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11921 *modconst
= GEN_INT (imm
);
11929 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11930 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11931 float elements), and a modified constant (whatever should be output for a
11932 VMOV) in *MODCONST. */
11935 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11936 rtx
*modconst
, int *elementwidth
)
11940 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11946 *modconst
= tmpconst
;
11949 *elementwidth
= tmpwidth
;
11954 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11955 the immediate is valid, write a constant suitable for using as an operand
11956 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11957 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11960 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11961 rtx
*modconst
, int *elementwidth
)
11965 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11967 if (retval
< 0 || retval
> 5)
11971 *modconst
= tmpconst
;
11974 *elementwidth
= tmpwidth
;
11979 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11980 the immediate is valid, write a constant suitable for using as an operand
11981 to VSHR/VSHL to *MODCONST and the corresponding element width to
11982 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11983 because they have different limitations. */
11986 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11987 rtx
*modconst
, int *elementwidth
,
11990 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11991 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11992 unsigned HOST_WIDE_INT last_elt
= 0;
11993 unsigned HOST_WIDE_INT maxshift
;
11995 /* Split vector constant out into a byte vector. */
11996 for (i
= 0; i
< n_elts
; i
++)
11998 rtx el
= CONST_VECTOR_ELT (op
, i
);
11999 unsigned HOST_WIDE_INT elpart
;
12001 if (CONST_INT_P (el
))
12002 elpart
= INTVAL (el
);
12003 else if (CONST_DOUBLE_P (el
))
12006 gcc_unreachable ();
12008 if (i
!= 0 && elpart
!= last_elt
)
12014 /* Shift less than element size. */
12015 maxshift
= innersize
* 8;
12019 /* Left shift immediate value can be from 0 to <size>-1. */
12020 if (last_elt
>= maxshift
)
12025 /* Right shift immediate value can be from 1 to <size>. */
12026 if (last_elt
== 0 || last_elt
> maxshift
)
12031 *elementwidth
= innersize
* 8;
12034 *modconst
= CONST_VECTOR_ELT (op
, 0);
12039 /* Return a string suitable for output of Neon immediate logic operation
12043 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12044 int inverse
, int quad
)
12046 int width
, is_valid
;
12047 static char templ
[40];
12049 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12051 gcc_assert (is_valid
!= 0);
12054 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12056 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12061 /* Return a string suitable for output of Neon immediate shift operation
12062 (VSHR or VSHL) MNEM. */
12065 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12066 machine_mode mode
, int quad
,
12069 int width
, is_valid
;
12070 static char templ
[40];
12072 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12073 gcc_assert (is_valid
!= 0);
12076 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12078 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12083 /* Output a sequence of pairwise operations to implement a reduction.
12084 NOTE: We do "too much work" here, because pairwise operations work on two
12085 registers-worth of operands in one go. Unfortunately we can't exploit those
12086 extra calculations to do the full operation in fewer steps, I don't think.
12087 Although all vector elements of the result but the first are ignored, we
12088 actually calculate the same result in each of the elements. An alternative
12089 such as initially loading a vector with zero to use as each of the second
12090 operands would use up an additional register and take an extra instruction,
12091 for no particular gain. */
12094 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12095 rtx (*reduc
) (rtx
, rtx
, rtx
))
12097 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12100 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12102 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12103 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12108 /* If VALS is a vector constant that can be loaded into a register
12109 using VDUP, generate instructions to do so and return an RTX to
12110 assign to the register. Otherwise return NULL_RTX. */
12113 neon_vdup_constant (rtx vals
)
12115 machine_mode mode
= GET_MODE (vals
);
12116 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12119 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12122 if (!const_vec_duplicate_p (vals
, &x
))
12123 /* The elements are not all the same. We could handle repeating
12124 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12125 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12129 /* We can load this constant by using VDUP and a constant in a
12130 single ARM register. This will be cheaper than a vector
12133 x
= copy_to_mode_reg (inner_mode
, x
);
12134 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12137 /* Generate code to load VALS, which is a PARALLEL containing only
12138 constants (for vec_init) or CONST_VECTOR, efficiently into a
12139 register. Returns an RTX to copy into the register, or NULL_RTX
12140 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12143 neon_make_constant (rtx vals
)
12145 machine_mode mode
= GET_MODE (vals
);
12147 rtx const_vec
= NULL_RTX
;
12148 int n_elts
= GET_MODE_NUNITS (mode
);
12152 if (GET_CODE (vals
) == CONST_VECTOR
)
12154 else if (GET_CODE (vals
) == PARALLEL
)
12156 /* A CONST_VECTOR must contain only CONST_INTs and
12157 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12158 Only store valid constants in a CONST_VECTOR. */
12159 for (i
= 0; i
< n_elts
; ++i
)
12161 rtx x
= XVECEXP (vals
, 0, i
);
12162 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12165 if (n_const
== n_elts
)
12166 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12169 gcc_unreachable ();
12171 if (const_vec
!= NULL
12172 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12173 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12175 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12176 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12177 pipeline cycle; creating the constant takes one or two ARM
12178 pipeline cycles. */
12180 else if (const_vec
!= NULL_RTX
)
12181 /* Load from constant pool. On Cortex-A8 this takes two cycles
12182 (for either double or quad vectors). We can not take advantage
12183 of single-cycle VLD1 because we need a PC-relative addressing
12187 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12188 We can not construct an initializer. */
12192 /* Initialize vector TARGET to VALS. */
12195 neon_expand_vector_init (rtx target
, rtx vals
)
12197 machine_mode mode
= GET_MODE (target
);
12198 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12199 int n_elts
= GET_MODE_NUNITS (mode
);
12200 int n_var
= 0, one_var
= -1;
12201 bool all_same
= true;
12205 for (i
= 0; i
< n_elts
; ++i
)
12207 x
= XVECEXP (vals
, 0, i
);
12208 if (!CONSTANT_P (x
))
12209 ++n_var
, one_var
= i
;
12211 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12217 rtx constant
= neon_make_constant (vals
);
12218 if (constant
!= NULL_RTX
)
12220 emit_move_insn (target
, constant
);
12225 /* Splat a single non-constant element if we can. */
12226 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12228 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12229 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12233 /* One field is non-constant. Load constant then overwrite varying
12234 field. This is more efficient than using the stack. */
12237 rtx copy
= copy_rtx (vals
);
12238 rtx index
= GEN_INT (one_var
);
12240 /* Load constant part of vector, substitute neighboring value for
12241 varying element. */
12242 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12243 neon_expand_vector_init (target
, copy
);
12245 /* Insert variable. */
12246 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12250 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12253 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12256 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12259 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12262 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12265 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12268 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12271 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12274 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12277 gcc_unreachable ();
12282 /* Construct the vector in memory one field at a time
12283 and load the whole vector. */
12284 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12285 for (i
= 0; i
< n_elts
; i
++)
12286 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12287 i
* GET_MODE_SIZE (inner_mode
)),
12288 XVECEXP (vals
, 0, i
));
12289 emit_move_insn (target
, mem
);
12292 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12293 ERR if it doesn't. EXP indicates the source location, which includes the
12294 inlining history for intrinsics. */
12297 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12298 const_tree exp
, const char *desc
)
12300 HOST_WIDE_INT lane
;
12302 gcc_assert (CONST_INT_P (operand
));
12304 lane
= INTVAL (operand
);
12306 if (lane
< low
|| lane
>= high
)
12309 error ("%K%s %wd out of range %wd - %wd",
12310 exp
, desc
, lane
, low
, high
- 1);
12312 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12316 /* Bounds-check lanes. */
12319 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12322 bounds_check (operand
, low
, high
, exp
, "lane");
12325 /* Bounds-check constants. */
12328 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12330 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12334 neon_element_bits (machine_mode mode
)
12336 return GET_MODE_UNIT_BITSIZE (mode
);
12340 /* Predicates for `match_operand' and `match_operator'. */
12342 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12343 WB is true if full writeback address modes are allowed and is false
12344 if limited writeback address modes (POST_INC and PRE_DEC) are
12348 arm_coproc_mem_operand (rtx op
, bool wb
)
12352 /* Reject eliminable registers. */
12353 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12354 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12355 || reg_mentioned_p (arg_pointer_rtx
, op
)
12356 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12357 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12358 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12359 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12362 /* Constants are converted into offsets from labels. */
12366 ind
= XEXP (op
, 0);
12368 if (reload_completed
12369 && (GET_CODE (ind
) == LABEL_REF
12370 || (GET_CODE (ind
) == CONST
12371 && GET_CODE (XEXP (ind
, 0)) == PLUS
12372 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12373 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12376 /* Match: (mem (reg)). */
12378 return arm_address_register_rtx_p (ind
, 0);
12380 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12381 acceptable in any case (subject to verification by
12382 arm_address_register_rtx_p). We need WB to be true to accept
12383 PRE_INC and POST_DEC. */
12384 if (GET_CODE (ind
) == POST_INC
12385 || GET_CODE (ind
) == PRE_DEC
12387 && (GET_CODE (ind
) == PRE_INC
12388 || GET_CODE (ind
) == POST_DEC
)))
12389 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12392 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12393 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12394 && GET_CODE (XEXP (ind
, 1)) == PLUS
12395 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12396 ind
= XEXP (ind
, 1);
12401 if (GET_CODE (ind
) == PLUS
12402 && REG_P (XEXP (ind
, 0))
12403 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12404 && CONST_INT_P (XEXP (ind
, 1))
12405 && INTVAL (XEXP (ind
, 1)) > -1024
12406 && INTVAL (XEXP (ind
, 1)) < 1024
12407 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12413 /* Return TRUE if OP is a memory operand which we can load or store a vector
12414 to/from. TYPE is one of the following values:
12415 0 - Vector load/stor (vldr)
12416 1 - Core registers (ldm)
12417 2 - Element/structure loads (vld1)
12420 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12424 /* Reject eliminable registers. */
12425 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12426 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12427 || reg_mentioned_p (arg_pointer_rtx
, op
)
12428 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12429 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12430 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12431 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12434 /* Constants are converted into offsets from labels. */
12438 ind
= XEXP (op
, 0);
12440 if (reload_completed
12441 && (GET_CODE (ind
) == LABEL_REF
12442 || (GET_CODE (ind
) == CONST
12443 && GET_CODE (XEXP (ind
, 0)) == PLUS
12444 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12445 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12448 /* Match: (mem (reg)). */
12450 return arm_address_register_rtx_p (ind
, 0);
12452 /* Allow post-increment with Neon registers. */
12453 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12454 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12455 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12457 /* Allow post-increment by register for VLDn */
12458 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12459 && GET_CODE (XEXP (ind
, 1)) == PLUS
12460 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12467 && GET_CODE (ind
) == PLUS
12468 && REG_P (XEXP (ind
, 0))
12469 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12470 && CONST_INT_P (XEXP (ind
, 1))
12471 && INTVAL (XEXP (ind
, 1)) > -1024
12472 /* For quad modes, we restrict the constant offset to be slightly less
12473 than what the instruction format permits. We have no such constraint
12474 on double mode offsets. (This must match arm_legitimate_index_p.) */
12475 && (INTVAL (XEXP (ind
, 1))
12476 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12477 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12483 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12486 neon_struct_mem_operand (rtx op
)
12490 /* Reject eliminable registers. */
12491 if (! (reload_in_progress
|| reload_completed
)
12492 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12493 || reg_mentioned_p (arg_pointer_rtx
, op
)
12494 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12495 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12496 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12497 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12500 /* Constants are converted into offsets from labels. */
12504 ind
= XEXP (op
, 0);
12506 if (reload_completed
12507 && (GET_CODE (ind
) == LABEL_REF
12508 || (GET_CODE (ind
) == CONST
12509 && GET_CODE (XEXP (ind
, 0)) == PLUS
12510 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12511 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12514 /* Match: (mem (reg)). */
12516 return arm_address_register_rtx_p (ind
, 0);
12518 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12519 if (GET_CODE (ind
) == POST_INC
12520 || GET_CODE (ind
) == PRE_DEC
)
12521 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12526 /* Return true if X is a register that will be eliminated later on. */
12528 arm_eliminable_register (rtx x
)
12530 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12531 || REGNO (x
) == ARG_POINTER_REGNUM
12532 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12533 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12536 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12537 coprocessor registers. Otherwise return NO_REGS. */
12540 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12542 if (mode
== HFmode
)
12544 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12545 return GENERAL_REGS
;
12546 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12548 return GENERAL_REGS
;
12551 /* The neon move patterns handle all legitimate vector and struct
12554 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12555 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12556 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12557 || VALID_NEON_STRUCT_MODE (mode
)))
12560 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12563 return GENERAL_REGS
;
12566 /* Values which must be returned in the most-significant end of the return
12570 arm_return_in_msb (const_tree valtype
)
12572 return (TARGET_AAPCS_BASED
12573 && BYTES_BIG_ENDIAN
12574 && (AGGREGATE_TYPE_P (valtype
)
12575 || TREE_CODE (valtype
) == COMPLEX_TYPE
12576 || FIXED_POINT_TYPE_P (valtype
)));
12579 /* Return TRUE if X references a SYMBOL_REF. */
12581 symbol_mentioned_p (rtx x
)
12586 if (GET_CODE (x
) == SYMBOL_REF
)
12589 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12590 are constant offsets, not symbols. */
12591 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12594 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12596 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12602 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12603 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12606 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12613 /* Return TRUE if X references a LABEL_REF. */
12615 label_mentioned_p (rtx x
)
12620 if (GET_CODE (x
) == LABEL_REF
)
12623 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12624 instruction, but they are constant offsets, not symbols. */
12625 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12628 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12629 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12635 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12636 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12639 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12647 tls_mentioned_p (rtx x
)
12649 switch (GET_CODE (x
))
12652 return tls_mentioned_p (XEXP (x
, 0));
12655 if (XINT (x
, 1) == UNSPEC_TLS
)
12658 /* Fall through. */
12664 /* Must not copy any rtx that uses a pc-relative address.
12665 Also, disallow copying of load-exclusive instructions that
12666 may appear after splitting of compare-and-swap-style operations
12667 so as to prevent those loops from being transformed away from their
12668 canonical forms (see PR 69904). */
12671 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12673 /* The tls call insn cannot be copied, as it is paired with a data
12675 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12678 subrtx_iterator::array_type array
;
12679 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12681 const_rtx x
= *iter
;
12682 if (GET_CODE (x
) == UNSPEC
12683 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12684 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12688 rtx set
= single_set (insn
);
12691 rtx src
= SET_SRC (set
);
12692 if (GET_CODE (src
) == ZERO_EXTEND
)
12693 src
= XEXP (src
, 0);
12695 /* Catch the load-exclusive and load-acquire operations. */
12696 if (GET_CODE (src
) == UNSPEC_VOLATILE
12697 && (XINT (src
, 1) == VUNSPEC_LL
12698 || XINT (src
, 1) == VUNSPEC_LAX
))
12705 minmax_code (rtx x
)
12707 enum rtx_code code
= GET_CODE (x
);
12720 gcc_unreachable ();
12724 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12727 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12728 int *mask
, bool *signed_sat
)
12730 /* The high bound must be a power of two minus one. */
12731 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12735 /* The low bound is either zero (for usat) or one less than the
12736 negation of the high bound (for ssat). */
12737 if (INTVAL (lo_bound
) == 0)
12742 *signed_sat
= false;
12747 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12752 *signed_sat
= true;
12760 /* Return 1 if memory locations are adjacent. */
12762 adjacent_mem_locations (rtx a
, rtx b
)
12764 /* We don't guarantee to preserve the order of these memory refs. */
12765 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12768 if ((REG_P (XEXP (a
, 0))
12769 || (GET_CODE (XEXP (a
, 0)) == PLUS
12770 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12771 && (REG_P (XEXP (b
, 0))
12772 || (GET_CODE (XEXP (b
, 0)) == PLUS
12773 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12775 HOST_WIDE_INT val0
= 0, val1
= 0;
12779 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12781 reg0
= XEXP (XEXP (a
, 0), 0);
12782 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12785 reg0
= XEXP (a
, 0);
12787 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12789 reg1
= XEXP (XEXP (b
, 0), 0);
12790 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12793 reg1
= XEXP (b
, 0);
12795 /* Don't accept any offset that will require multiple
12796 instructions to handle, since this would cause the
12797 arith_adjacentmem pattern to output an overlong sequence. */
12798 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12801 /* Don't allow an eliminable register: register elimination can make
12802 the offset too large. */
12803 if (arm_eliminable_register (reg0
))
12806 val_diff
= val1
- val0
;
12810 /* If the target has load delay slots, then there's no benefit
12811 to using an ldm instruction unless the offset is zero and
12812 we are optimizing for size. */
12813 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12814 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12815 && (val_diff
== 4 || val_diff
== -4));
12818 return ((REGNO (reg0
) == REGNO (reg1
))
12819 && (val_diff
== 4 || val_diff
== -4));
12825 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12826 for load operations, false for store operations. CONSECUTIVE is true
12827 if the register numbers in the operation must be consecutive in the register
12828 bank. RETURN_PC is true if value is to be loaded in PC.
12829 The pattern we are trying to match for load is:
12830 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12831 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12834 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12837 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12838 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12839 3. If consecutive is TRUE, then for kth register being loaded,
12840 REGNO (R_dk) = REGNO (R_d0) + k.
12841 The pattern for store is similar. */
12843 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12844 bool consecutive
, bool return_pc
)
12846 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12847 rtx reg
, mem
, addr
;
12849 unsigned first_regno
;
12850 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12852 bool addr_reg_in_reglist
= false;
12853 bool update
= false;
12858 /* If not in SImode, then registers must be consecutive
12859 (e.g., VLDM instructions for DFmode). */
12860 gcc_assert ((mode
== SImode
) || consecutive
);
12861 /* Setting return_pc for stores is illegal. */
12862 gcc_assert (!return_pc
|| load
);
12864 /* Set up the increments and the regs per val based on the mode. */
12865 reg_increment
= GET_MODE_SIZE (mode
);
12866 regs_per_val
= reg_increment
/ 4;
12867 offset_adj
= return_pc
? 1 : 0;
12870 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12871 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12874 /* Check if this is a write-back. */
12875 elt
= XVECEXP (op
, 0, offset_adj
);
12876 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12882 /* The offset adjustment must be the number of registers being
12883 popped times the size of a single register. */
12884 if (!REG_P (SET_DEST (elt
))
12885 || !REG_P (XEXP (SET_SRC (elt
), 0))
12886 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12887 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12888 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12889 ((count
- 1 - offset_adj
) * reg_increment
))
12893 i
= i
+ offset_adj
;
12894 base
= base
+ offset_adj
;
12895 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12896 success depends on the type: VLDM can do just one reg,
12897 LDM must do at least two. */
12898 if ((count
<= i
) && (mode
== SImode
))
12901 elt
= XVECEXP (op
, 0, i
- 1);
12902 if (GET_CODE (elt
) != SET
)
12907 reg
= SET_DEST (elt
);
12908 mem
= SET_SRC (elt
);
12912 reg
= SET_SRC (elt
);
12913 mem
= SET_DEST (elt
);
12916 if (!REG_P (reg
) || !MEM_P (mem
))
12919 regno
= REGNO (reg
);
12920 first_regno
= regno
;
12921 addr
= XEXP (mem
, 0);
12922 if (GET_CODE (addr
) == PLUS
)
12924 if (!CONST_INT_P (XEXP (addr
, 1)))
12927 offset
= INTVAL (XEXP (addr
, 1));
12928 addr
= XEXP (addr
, 0);
12934 /* Don't allow SP to be loaded unless it is also the base register. It
12935 guarantees that SP is reset correctly when an LDM instruction
12936 is interrupted. Otherwise, we might end up with a corrupt stack. */
12937 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12940 for (; i
< count
; i
++)
12942 elt
= XVECEXP (op
, 0, i
);
12943 if (GET_CODE (elt
) != SET
)
12948 reg
= SET_DEST (elt
);
12949 mem
= SET_SRC (elt
);
12953 reg
= SET_SRC (elt
);
12954 mem
= SET_DEST (elt
);
12958 || GET_MODE (reg
) != mode
12959 || REGNO (reg
) <= regno
12962 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12963 /* Don't allow SP to be loaded unless it is also the base register. It
12964 guarantees that SP is reset correctly when an LDM instruction
12965 is interrupted. Otherwise, we might end up with a corrupt stack. */
12966 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12968 || GET_MODE (mem
) != mode
12969 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12970 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12971 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12972 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12973 offset
+ (i
- base
) * reg_increment
))
12974 && (!REG_P (XEXP (mem
, 0))
12975 || offset
+ (i
- base
) * reg_increment
!= 0)))
12978 regno
= REGNO (reg
);
12979 if (regno
== REGNO (addr
))
12980 addr_reg_in_reglist
= true;
12985 if (update
&& addr_reg_in_reglist
)
12988 /* For Thumb-1, address register is always modified - either by write-back
12989 or by explicit load. If the pattern does not describe an update,
12990 then the address register must be in the list of loaded registers. */
12992 return update
|| addr_reg_in_reglist
;
12998 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12999 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13000 instruction. ADD_OFFSET is nonzero if the base address register needs
13001 to be modified with an add instruction before we can use it. */
13004 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13005 int nops
, HOST_WIDE_INT add_offset
)
13007 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13008 if the offset isn't small enough. The reason 2 ldrs are faster
13009 is because these ARMs are able to do more than one cache access
13010 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13011 whilst the ARM8 has a double bandwidth cache. This means that
13012 these cores can do both an instruction fetch and a data fetch in
13013 a single cycle, so the trick of calculating the address into a
13014 scratch register (one of the result regs) and then doing a load
13015 multiple actually becomes slower (and no smaller in code size).
13016 That is the transformation
13018 ldr rd1, [rbase + offset]
13019 ldr rd2, [rbase + offset + 4]
13023 add rd1, rbase, offset
13024 ldmia rd1, {rd1, rd2}
13026 produces worse code -- '3 cycles + any stalls on rd2' instead of
13027 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13028 access per cycle, the first sequence could never complete in less
13029 than 6 cycles, whereas the ldm sequence would only take 5 and
13030 would make better use of sequential accesses if not hitting the
13033 We cheat here and test 'arm_ld_sched' which we currently know to
13034 only be true for the ARM8, ARM9 and StrongARM. If this ever
13035 changes, then the test below needs to be reworked. */
13036 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13039 /* XScale has load-store double instructions, but they have stricter
13040 alignment requirements than load-store multiple, so we cannot
13043 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13044 the pipeline until completion.
13052 An ldr instruction takes 1-3 cycles, but does not block the
13061 Best case ldr will always win. However, the more ldr instructions
13062 we issue, the less likely we are to be able to schedule them well.
13063 Using ldr instructions also increases code size.
13065 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13066 for counts of 3 or 4 regs. */
13067 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13072 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13073 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13074 an array ORDER which describes the sequence to use when accessing the
13075 offsets that produces an ascending order. In this sequence, each
13076 offset must be larger by exactly 4 than the previous one. ORDER[0]
13077 must have been filled in with the lowest offset by the caller.
13078 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13079 we use to verify that ORDER produces an ascending order of registers.
13080 Return true if it was possible to construct such an order, false if
13084 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13085 int *unsorted_regs
)
13088 for (i
= 1; i
< nops
; i
++)
13092 order
[i
] = order
[i
- 1];
13093 for (j
= 0; j
< nops
; j
++)
13094 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13096 /* We must find exactly one offset that is higher than the
13097 previous one by 4. */
13098 if (order
[i
] != order
[i
- 1])
13102 if (order
[i
] == order
[i
- 1])
13104 /* The register numbers must be ascending. */
13105 if (unsorted_regs
!= NULL
13106 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13112 /* Used to determine in a peephole whether a sequence of load
13113 instructions can be changed into a load-multiple instruction.
13114 NOPS is the number of separate load instructions we are examining. The
13115 first NOPS entries in OPERANDS are the destination registers, the
13116 next NOPS entries are memory operands. If this function is
13117 successful, *BASE is set to the common base register of the memory
13118 accesses; *LOAD_OFFSET is set to the first memory location's offset
13119 from that base register.
13120 REGS is an array filled in with the destination register numbers.
13121 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13122 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13123 the sequence of registers in REGS matches the loads from ascending memory
13124 locations, and the function verifies that the register numbers are
13125 themselves ascending. If CHECK_REGS is false, the register numbers
13126 are stored in the order they are found in the operands. */
13128 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13129 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13131 int unsorted_regs
[MAX_LDM_STM_OPS
];
13132 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13133 int order
[MAX_LDM_STM_OPS
];
13134 rtx base_reg_rtx
= NULL
;
13138 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13139 easily extended if required. */
13140 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13142 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13144 /* Loop over the operands and check that the memory references are
13145 suitable (i.e. immediate offsets from the same base register). At
13146 the same time, extract the target register, and the memory
13148 for (i
= 0; i
< nops
; i
++)
13153 /* Convert a subreg of a mem into the mem itself. */
13154 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13155 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13157 gcc_assert (MEM_P (operands
[nops
+ i
]));
13159 /* Don't reorder volatile memory references; it doesn't seem worth
13160 looking for the case where the order is ok anyway. */
13161 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13164 offset
= const0_rtx
;
13166 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13167 || (GET_CODE (reg
) == SUBREG
13168 && REG_P (reg
= SUBREG_REG (reg
))))
13169 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13170 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13171 || (GET_CODE (reg
) == SUBREG
13172 && REG_P (reg
= SUBREG_REG (reg
))))
13173 && (CONST_INT_P (offset
13174 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13178 base_reg
= REGNO (reg
);
13179 base_reg_rtx
= reg
;
13180 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13183 else if (base_reg
!= (int) REGNO (reg
))
13184 /* Not addressed from the same base register. */
13187 unsorted_regs
[i
] = (REG_P (operands
[i
])
13188 ? REGNO (operands
[i
])
13189 : REGNO (SUBREG_REG (operands
[i
])));
13191 /* If it isn't an integer register, or if it overwrites the
13192 base register but isn't the last insn in the list, then
13193 we can't do this. */
13194 if (unsorted_regs
[i
] < 0
13195 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13196 || unsorted_regs
[i
] > 14
13197 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13200 /* Don't allow SP to be loaded unless it is also the base
13201 register. It guarantees that SP is reset correctly when
13202 an LDM instruction is interrupted. Otherwise, we might
13203 end up with a corrupt stack. */
13204 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13207 unsorted_offsets
[i
] = INTVAL (offset
);
13208 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13212 /* Not a suitable memory address. */
13216 /* All the useful information has now been extracted from the
13217 operands into unsorted_regs and unsorted_offsets; additionally,
13218 order[0] has been set to the lowest offset in the list. Sort
13219 the offsets into order, verifying that they are adjacent, and
13220 check that the register numbers are ascending. */
13221 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13222 check_regs
? unsorted_regs
: NULL
))
13226 memcpy (saved_order
, order
, sizeof order
);
13232 for (i
= 0; i
< nops
; i
++)
13233 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13235 *load_offset
= unsorted_offsets
[order
[0]];
13239 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13242 if (unsorted_offsets
[order
[0]] == 0)
13243 ldm_case
= 1; /* ldmia */
13244 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13245 ldm_case
= 2; /* ldmib */
13246 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13247 ldm_case
= 3; /* ldmda */
13248 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13249 ldm_case
= 4; /* ldmdb */
13250 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13251 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13256 if (!multiple_operation_profitable_p (false, nops
,
13258 ? unsorted_offsets
[order
[0]] : 0))
13264 /* Used to determine in a peephole whether a sequence of store instructions can
13265 be changed into a store-multiple instruction.
13266 NOPS is the number of separate store instructions we are examining.
13267 NOPS_TOTAL is the total number of instructions recognized by the peephole
13269 The first NOPS entries in OPERANDS are the source registers, the next
13270 NOPS entries are memory operands. If this function is successful, *BASE is
13271 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13272 to the first memory location's offset from that base register. REGS is an
13273 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13274 likewise filled with the corresponding rtx's.
13275 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13276 numbers to an ascending order of stores.
13277 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13278 from ascending memory locations, and the function verifies that the register
13279 numbers are themselves ascending. If CHECK_REGS is false, the register
13280 numbers are stored in the order they are found in the operands. */
13282 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13283 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13284 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13286 int unsorted_regs
[MAX_LDM_STM_OPS
];
13287 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13288 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13289 int order
[MAX_LDM_STM_OPS
];
13291 rtx base_reg_rtx
= NULL
;
13294 /* Write back of base register is currently only supported for Thumb 1. */
13295 int base_writeback
= TARGET_THUMB1
;
13297 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13298 easily extended if required. */
13299 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13301 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13303 /* Loop over the operands and check that the memory references are
13304 suitable (i.e. immediate offsets from the same base register). At
13305 the same time, extract the target register, and the memory
13307 for (i
= 0; i
< nops
; i
++)
13312 /* Convert a subreg of a mem into the mem itself. */
13313 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13314 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13316 gcc_assert (MEM_P (operands
[nops
+ i
]));
13318 /* Don't reorder volatile memory references; it doesn't seem worth
13319 looking for the case where the order is ok anyway. */
13320 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13323 offset
= const0_rtx
;
13325 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13326 || (GET_CODE (reg
) == SUBREG
13327 && REG_P (reg
= SUBREG_REG (reg
))))
13328 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13329 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13330 || (GET_CODE (reg
) == SUBREG
13331 && REG_P (reg
= SUBREG_REG (reg
))))
13332 && (CONST_INT_P (offset
13333 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13335 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13336 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13337 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13341 base_reg
= REGNO (reg
);
13342 base_reg_rtx
= reg
;
13343 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13346 else if (base_reg
!= (int) REGNO (reg
))
13347 /* Not addressed from the same base register. */
13350 /* If it isn't an integer register, then we can't do this. */
13351 if (unsorted_regs
[i
] < 0
13352 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13353 /* The effects are unpredictable if the base register is
13354 both updated and stored. */
13355 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13356 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13357 || unsorted_regs
[i
] > 14)
13360 unsorted_offsets
[i
] = INTVAL (offset
);
13361 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13365 /* Not a suitable memory address. */
13369 /* All the useful information has now been extracted from the
13370 operands into unsorted_regs and unsorted_offsets; additionally,
13371 order[0] has been set to the lowest offset in the list. Sort
13372 the offsets into order, verifying that they are adjacent, and
13373 check that the register numbers are ascending. */
13374 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13375 check_regs
? unsorted_regs
: NULL
))
13379 memcpy (saved_order
, order
, sizeof order
);
13385 for (i
= 0; i
< nops
; i
++)
13387 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13389 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13392 *load_offset
= unsorted_offsets
[order
[0]];
13396 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13399 if (unsorted_offsets
[order
[0]] == 0)
13400 stm_case
= 1; /* stmia */
13401 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13402 stm_case
= 2; /* stmib */
13403 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13404 stm_case
= 3; /* stmda */
13405 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13406 stm_case
= 4; /* stmdb */
13410 if (!multiple_operation_profitable_p (false, nops
, 0))
13416 /* Routines for use in generating RTL. */
13418 /* Generate a load-multiple instruction. COUNT is the number of loads in
13419 the instruction; REGS and MEMS are arrays containing the operands.
13420 BASEREG is the base register to be used in addressing the memory operands.
13421 WBACK_OFFSET is nonzero if the instruction should update the base
13425 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13426 HOST_WIDE_INT wback_offset
)
13431 if (!multiple_operation_profitable_p (false, count
, 0))
13437 for (i
= 0; i
< count
; i
++)
13438 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13440 if (wback_offset
!= 0)
13441 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13443 seq
= get_insns ();
13449 result
= gen_rtx_PARALLEL (VOIDmode
,
13450 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13451 if (wback_offset
!= 0)
13453 XVECEXP (result
, 0, 0)
13454 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13459 for (j
= 0; i
< count
; i
++, j
++)
13460 XVECEXP (result
, 0, i
)
13461 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13466 /* Generate a store-multiple instruction. COUNT is the number of stores in
13467 the instruction; REGS and MEMS are arrays containing the operands.
13468 BASEREG is the base register to be used in addressing the memory operands.
13469 WBACK_OFFSET is nonzero if the instruction should update the base
13473 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13474 HOST_WIDE_INT wback_offset
)
13479 if (GET_CODE (basereg
) == PLUS
)
13480 basereg
= XEXP (basereg
, 0);
13482 if (!multiple_operation_profitable_p (false, count
, 0))
13488 for (i
= 0; i
< count
; i
++)
13489 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13491 if (wback_offset
!= 0)
13492 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13494 seq
= get_insns ();
13500 result
= gen_rtx_PARALLEL (VOIDmode
,
13501 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13502 if (wback_offset
!= 0)
13504 XVECEXP (result
, 0, 0)
13505 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13510 for (j
= 0; i
< count
; i
++, j
++)
13511 XVECEXP (result
, 0, i
)
13512 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13517 /* Generate either a load-multiple or a store-multiple instruction. This
13518 function can be used in situations where we can start with a single MEM
13519 rtx and adjust its address upwards.
13520 COUNT is the number of operations in the instruction, not counting a
13521 possible update of the base register. REGS is an array containing the
13523 BASEREG is the base register to be used in addressing the memory operands,
13524 which are constructed from BASEMEM.
13525 WRITE_BACK specifies whether the generated instruction should include an
13526 update of the base register.
13527 OFFSETP is used to pass an offset to and from this function; this offset
13528 is not used when constructing the address (instead BASEMEM should have an
13529 appropriate offset in its address), it is used only for setting
13530 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13533 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13534 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13536 rtx mems
[MAX_LDM_STM_OPS
];
13537 HOST_WIDE_INT offset
= *offsetp
;
13540 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13542 if (GET_CODE (basereg
) == PLUS
)
13543 basereg
= XEXP (basereg
, 0);
13545 for (i
= 0; i
< count
; i
++)
13547 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13548 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13556 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13557 write_back
? 4 * count
: 0);
13559 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13560 write_back
? 4 * count
: 0);
13564 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13565 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13567 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13572 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13573 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13575 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13579 /* Called from a peephole2 expander to turn a sequence of loads into an
13580 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13581 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13582 is true if we can reorder the registers because they are used commutatively
13584 Returns true iff we could generate a new instruction. */
13587 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13589 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13590 rtx mems
[MAX_LDM_STM_OPS
];
13591 int i
, j
, base_reg
;
13593 HOST_WIDE_INT offset
;
13594 int write_back
= FALSE
;
13598 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13599 &base_reg
, &offset
, !sort_regs
);
13605 for (i
= 0; i
< nops
- 1; i
++)
13606 for (j
= i
+ 1; j
< nops
; j
++)
13607 if (regs
[i
] > regs
[j
])
13613 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13617 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13618 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13624 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13625 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13627 if (!TARGET_THUMB1
)
13628 base_reg_rtx
= newbase
;
13631 for (i
= 0; i
< nops
; i
++)
13633 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13634 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13637 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13638 write_back
? offset
+ i
* 4 : 0));
13642 /* Called from a peephole2 expander to turn a sequence of stores into an
13643 STM instruction. OPERANDS are the operands found by the peephole matcher;
13644 NOPS indicates how many separate stores we are trying to combine.
13645 Returns true iff we could generate a new instruction. */
13648 gen_stm_seq (rtx
*operands
, int nops
)
13651 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13652 rtx mems
[MAX_LDM_STM_OPS
];
13655 HOST_WIDE_INT offset
;
13656 int write_back
= FALSE
;
13659 bool base_reg_dies
;
13661 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13662 mem_order
, &base_reg
, &offset
, true);
13667 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13669 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13672 gcc_assert (base_reg_dies
);
13678 gcc_assert (base_reg_dies
);
13679 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13683 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13685 for (i
= 0; i
< nops
; i
++)
13687 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13688 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13691 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13692 write_back
? offset
+ i
* 4 : 0));
13696 /* Called from a peephole2 expander to turn a sequence of stores that are
13697 preceded by constant loads into an STM instruction. OPERANDS are the
13698 operands found by the peephole matcher; NOPS indicates how many
13699 separate stores we are trying to combine; there are 2 * NOPS
13700 instructions in the peephole.
13701 Returns true iff we could generate a new instruction. */
13704 gen_const_stm_seq (rtx
*operands
, int nops
)
13706 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13707 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13708 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13709 rtx mems
[MAX_LDM_STM_OPS
];
13712 HOST_WIDE_INT offset
;
13713 int write_back
= FALSE
;
13716 bool base_reg_dies
;
13718 HARD_REG_SET allocated
;
13720 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13721 mem_order
, &base_reg
, &offset
, false);
13726 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13728 /* If the same register is used more than once, try to find a free
13730 CLEAR_HARD_REG_SET (allocated
);
13731 for (i
= 0; i
< nops
; i
++)
13733 for (j
= i
+ 1; j
< nops
; j
++)
13734 if (regs
[i
] == regs
[j
])
13736 rtx t
= peep2_find_free_register (0, nops
* 2,
13737 TARGET_THUMB1
? "l" : "r",
13738 SImode
, &allocated
);
13742 regs
[i
] = REGNO (t
);
13746 /* Compute an ordering that maps the register numbers to an ascending
13749 for (i
= 0; i
< nops
; i
++)
13750 if (regs
[i
] < regs
[reg_order
[0]])
13753 for (i
= 1; i
< nops
; i
++)
13755 int this_order
= reg_order
[i
- 1];
13756 for (j
= 0; j
< nops
; j
++)
13757 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13758 && (this_order
== reg_order
[i
- 1]
13759 || regs
[j
] < regs
[this_order
]))
13761 reg_order
[i
] = this_order
;
13764 /* Ensure that registers that must be live after the instruction end
13765 up with the correct value. */
13766 for (i
= 0; i
< nops
; i
++)
13768 int this_order
= reg_order
[i
];
13769 if ((this_order
!= mem_order
[i
]
13770 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13771 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13775 /* Load the constants. */
13776 for (i
= 0; i
< nops
; i
++)
13778 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13779 sorted_regs
[i
] = regs
[reg_order
[i
]];
13780 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13783 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13785 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13788 gcc_assert (base_reg_dies
);
13794 gcc_assert (base_reg_dies
);
13795 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13799 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13801 for (i
= 0; i
< nops
; i
++)
13803 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13804 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13807 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13808 write_back
? offset
+ i
* 4 : 0));
13812 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13813 unaligned copies on processors which support unaligned semantics for those
13814 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13815 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13816 An interleave factor of 1 (the minimum) will perform no interleaving.
13817 Load/store multiple are used for aligned addresses where possible. */
13820 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13821 HOST_WIDE_INT length
,
13822 unsigned int interleave_factor
)
13824 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13825 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13826 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13827 HOST_WIDE_INT i
, j
;
13828 HOST_WIDE_INT remaining
= length
, words
;
13829 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13831 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13832 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13833 HOST_WIDE_INT srcoffset
, dstoffset
;
13834 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13837 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13839 /* Use hard registers if we have aligned source or destination so we can use
13840 load/store multiple with contiguous registers. */
13841 if (dst_aligned
|| src_aligned
)
13842 for (i
= 0; i
< interleave_factor
; i
++)
13843 regs
[i
] = gen_rtx_REG (SImode
, i
);
13845 for (i
= 0; i
< interleave_factor
; i
++)
13846 regs
[i
] = gen_reg_rtx (SImode
);
13848 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13849 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13851 srcoffset
= dstoffset
= 0;
13853 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13854 For copying the last bytes we want to subtract this offset again. */
13855 src_autoinc
= dst_autoinc
= 0;
13857 for (i
= 0; i
< interleave_factor
; i
++)
13860 /* Copy BLOCK_SIZE_BYTES chunks. */
13862 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13865 if (src_aligned
&& interleave_factor
> 1)
13867 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13868 TRUE
, srcbase
, &srcoffset
));
13869 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13873 for (j
= 0; j
< interleave_factor
; j
++)
13875 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13877 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13878 srcoffset
+ j
* UNITS_PER_WORD
);
13879 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13881 srcoffset
+= block_size_bytes
;
13885 if (dst_aligned
&& interleave_factor
> 1)
13887 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13888 TRUE
, dstbase
, &dstoffset
));
13889 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13893 for (j
= 0; j
< interleave_factor
; j
++)
13895 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13897 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13898 dstoffset
+ j
* UNITS_PER_WORD
);
13899 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13901 dstoffset
+= block_size_bytes
;
13904 remaining
-= block_size_bytes
;
13907 /* Copy any whole words left (note these aren't interleaved with any
13908 subsequent halfword/byte load/stores in the interests of simplicity). */
13910 words
= remaining
/ UNITS_PER_WORD
;
13912 gcc_assert (words
< interleave_factor
);
13914 if (src_aligned
&& words
> 1)
13916 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13918 src_autoinc
+= UNITS_PER_WORD
* words
;
13922 for (j
= 0; j
< words
; j
++)
13924 addr
= plus_constant (Pmode
, src
,
13925 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13926 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13927 srcoffset
+ j
* UNITS_PER_WORD
);
13929 emit_move_insn (regs
[j
], mem
);
13931 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13933 srcoffset
+= words
* UNITS_PER_WORD
;
13936 if (dst_aligned
&& words
> 1)
13938 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13940 dst_autoinc
+= words
* UNITS_PER_WORD
;
13944 for (j
= 0; j
< words
; j
++)
13946 addr
= plus_constant (Pmode
, dst
,
13947 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13948 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13949 dstoffset
+ j
* UNITS_PER_WORD
);
13951 emit_move_insn (mem
, regs
[j
]);
13953 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13955 dstoffset
+= words
* UNITS_PER_WORD
;
13958 remaining
-= words
* UNITS_PER_WORD
;
13960 gcc_assert (remaining
< 4);
13962 /* Copy a halfword if necessary. */
13964 if (remaining
>= 2)
13966 halfword_tmp
= gen_reg_rtx (SImode
);
13968 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13969 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13970 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13972 /* Either write out immediately, or delay until we've loaded the last
13973 byte, depending on interleave factor. */
13974 if (interleave_factor
== 1)
13976 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13977 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13978 emit_insn (gen_unaligned_storehi (mem
,
13979 gen_lowpart (HImode
, halfword_tmp
)));
13980 halfword_tmp
= NULL
;
13988 gcc_assert (remaining
< 2);
13990 /* Copy last byte. */
13992 if ((remaining
& 1) != 0)
13994 byte_tmp
= gen_reg_rtx (SImode
);
13996 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13997 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13998 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14000 if (interleave_factor
== 1)
14002 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14003 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14004 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14013 /* Store last halfword if we haven't done so already. */
14017 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14018 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14019 emit_insn (gen_unaligned_storehi (mem
,
14020 gen_lowpart (HImode
, halfword_tmp
)));
14024 /* Likewise for last byte. */
14028 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14029 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14030 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14034 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14037 /* From mips_adjust_block_mem:
14039 Helper function for doing a loop-based block operation on memory
14040 reference MEM. Each iteration of the loop will operate on LENGTH
14043 Create a new base register for use within the loop and point it to
14044 the start of MEM. Create a new memory reference that uses this
14045 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14048 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14051 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14053 /* Although the new mem does not refer to a known location,
14054 it does keep up to LENGTH bytes of alignment. */
14055 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14056 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14059 /* From mips_block_move_loop:
14061 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14062 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14063 the memory regions do not overlap. */
14066 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14067 unsigned int interleave_factor
,
14068 HOST_WIDE_INT bytes_per_iter
)
14070 rtx src_reg
, dest_reg
, final_src
, test
;
14071 HOST_WIDE_INT leftover
;
14073 leftover
= length
% bytes_per_iter
;
14074 length
-= leftover
;
14076 /* Create registers and memory references for use within the loop. */
14077 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14078 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14080 /* Calculate the value that SRC_REG should have after the last iteration of
14082 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14083 0, 0, OPTAB_WIDEN
);
14085 /* Emit the start of the loop. */
14086 rtx_code_label
*label
= gen_label_rtx ();
14087 emit_label (label
);
14089 /* Emit the loop body. */
14090 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14091 interleave_factor
);
14093 /* Move on to the next block. */
14094 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14095 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14097 /* Emit the loop condition. */
14098 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14099 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14101 /* Mop up any left-over bytes. */
14103 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14106 /* Emit a block move when either the source or destination is unaligned (not
14107 aligned to a four-byte boundary). This may need further tuning depending on
14108 core type, optimize_size setting, etc. */
14111 arm_movmemqi_unaligned (rtx
*operands
)
14113 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14117 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14118 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14119 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14120 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14121 or dst_aligned though: allow more interleaving in those cases since the
14122 resulting code can be smaller. */
14123 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14124 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14127 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14128 interleave_factor
, bytes_per_iter
);
14130 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14131 interleave_factor
);
14135 /* Note that the loop created by arm_block_move_unaligned_loop may be
14136 subject to loop unrolling, which makes tuning this condition a little
14139 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14141 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14148 arm_gen_movmemqi (rtx
*operands
)
14150 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14151 HOST_WIDE_INT srcoffset
, dstoffset
;
14152 rtx src
, dst
, srcbase
, dstbase
;
14153 rtx part_bytes_reg
= NULL
;
14156 if (!CONST_INT_P (operands
[2])
14157 || !CONST_INT_P (operands
[3])
14158 || INTVAL (operands
[2]) > 64)
14161 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14162 return arm_movmemqi_unaligned (operands
);
14164 if (INTVAL (operands
[3]) & 3)
14167 dstbase
= operands
[0];
14168 srcbase
= operands
[1];
14170 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14171 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14173 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14174 out_words_to_go
= INTVAL (operands
[2]) / 4;
14175 last_bytes
= INTVAL (operands
[2]) & 3;
14176 dstoffset
= srcoffset
= 0;
14178 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14179 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14181 while (in_words_to_go
>= 2)
14183 if (in_words_to_go
> 4)
14184 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14185 TRUE
, srcbase
, &srcoffset
));
14187 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14188 src
, FALSE
, srcbase
,
14191 if (out_words_to_go
)
14193 if (out_words_to_go
> 4)
14194 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14195 TRUE
, dstbase
, &dstoffset
));
14196 else if (out_words_to_go
!= 1)
14197 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14198 out_words_to_go
, dst
,
14201 dstbase
, &dstoffset
));
14204 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14205 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14206 if (last_bytes
!= 0)
14208 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14214 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14215 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14218 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14219 if (out_words_to_go
)
14223 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14224 sreg
= copy_to_reg (mem
);
14226 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14227 emit_move_insn (mem
, sreg
);
14230 gcc_assert (!in_words_to_go
); /* Sanity check */
14233 if (in_words_to_go
)
14235 gcc_assert (in_words_to_go
> 0);
14237 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14238 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14241 gcc_assert (!last_bytes
|| part_bytes_reg
);
14243 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14245 rtx tmp
= gen_reg_rtx (SImode
);
14247 /* The bytes we want are in the top end of the word. */
14248 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14249 GEN_INT (8 * (4 - last_bytes
))));
14250 part_bytes_reg
= tmp
;
14254 mem
= adjust_automodify_address (dstbase
, QImode
,
14255 plus_constant (Pmode
, dst
,
14257 dstoffset
+ last_bytes
- 1);
14258 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14262 tmp
= gen_reg_rtx (SImode
);
14263 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14264 part_bytes_reg
= tmp
;
14271 if (last_bytes
> 1)
14273 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14274 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14278 rtx tmp
= gen_reg_rtx (SImode
);
14279 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14280 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14281 part_bytes_reg
= tmp
;
14288 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14289 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14296 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14299 next_consecutive_mem (rtx mem
)
14301 machine_mode mode
= GET_MODE (mem
);
14302 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14303 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14305 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14308 /* Copy using LDRD/STRD instructions whenever possible.
14309 Returns true upon success. */
14311 gen_movmem_ldrd_strd (rtx
*operands
)
14313 unsigned HOST_WIDE_INT len
;
14314 HOST_WIDE_INT align
;
14315 rtx src
, dst
, base
;
14317 bool src_aligned
, dst_aligned
;
14318 bool src_volatile
, dst_volatile
;
14320 gcc_assert (CONST_INT_P (operands
[2]));
14321 gcc_assert (CONST_INT_P (operands
[3]));
14323 len
= UINTVAL (operands
[2]);
14327 /* Maximum alignment we can assume for both src and dst buffers. */
14328 align
= INTVAL (operands
[3]);
14330 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14333 /* Place src and dst addresses in registers
14334 and update the corresponding mem rtx. */
14336 dst_volatile
= MEM_VOLATILE_P (dst
);
14337 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14338 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14339 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14342 src_volatile
= MEM_VOLATILE_P (src
);
14343 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14344 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14345 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14347 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14350 if (src_volatile
|| dst_volatile
)
14353 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14354 if (!(dst_aligned
|| src_aligned
))
14355 return arm_gen_movmemqi (operands
);
14357 /* If the either src or dst is unaligned we'll be accessing it as pairs
14358 of unaligned SImode accesses. Otherwise we can generate DImode
14359 ldrd/strd instructions. */
14360 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14361 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14366 reg0
= gen_reg_rtx (DImode
);
14367 rtx low_reg
= NULL_RTX
;
14368 rtx hi_reg
= NULL_RTX
;
14370 if (!src_aligned
|| !dst_aligned
)
14372 low_reg
= gen_lowpart (SImode
, reg0
);
14373 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14376 emit_move_insn (reg0
, src
);
14379 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14380 src
= next_consecutive_mem (src
);
14381 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14385 emit_move_insn (dst
, reg0
);
14388 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14389 dst
= next_consecutive_mem (dst
);
14390 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14393 src
= next_consecutive_mem (src
);
14394 dst
= next_consecutive_mem (dst
);
14397 gcc_assert (len
< 8);
14400 /* More than a word but less than a double-word to copy. Copy a word. */
14401 reg0
= gen_reg_rtx (SImode
);
14402 src
= adjust_address (src
, SImode
, 0);
14403 dst
= adjust_address (dst
, SImode
, 0);
14405 emit_move_insn (reg0
, src
);
14407 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14410 emit_move_insn (dst
, reg0
);
14412 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14414 src
= next_consecutive_mem (src
);
14415 dst
= next_consecutive_mem (dst
);
14422 /* Copy the remaining bytes. */
14425 dst
= adjust_address (dst
, HImode
, 0);
14426 src
= adjust_address (src
, HImode
, 0);
14427 reg0
= gen_reg_rtx (SImode
);
14429 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14431 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14434 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14436 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14438 src
= next_consecutive_mem (src
);
14439 dst
= next_consecutive_mem (dst
);
14444 dst
= adjust_address (dst
, QImode
, 0);
14445 src
= adjust_address (src
, QImode
, 0);
14446 reg0
= gen_reg_rtx (QImode
);
14447 emit_move_insn (reg0
, src
);
14448 emit_move_insn (dst
, reg0
);
14452 /* Select a dominance comparison mode if possible for a test of the general
14453 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14454 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14455 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14456 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14457 In all cases OP will be either EQ or NE, but we don't need to know which
14458 here. If we are unable to support a dominance comparison we return
14459 CC mode. This will then fail to match for the RTL expressions that
14460 generate this call. */
14462 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14464 enum rtx_code cond1
, cond2
;
14467 /* Currently we will probably get the wrong result if the individual
14468 comparisons are not simple. This also ensures that it is safe to
14469 reverse a comparison if necessary. */
14470 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14472 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14476 /* The if_then_else variant of this tests the second condition if the
14477 first passes, but is true if the first fails. Reverse the first
14478 condition to get a true "inclusive-or" expression. */
14479 if (cond_or
== DOM_CC_NX_OR_Y
)
14480 cond1
= reverse_condition (cond1
);
14482 /* If the comparisons are not equal, and one doesn't dominate the other,
14483 then we can't do this. */
14485 && !comparison_dominates_p (cond1
, cond2
)
14486 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14490 std::swap (cond1
, cond2
);
14495 if (cond_or
== DOM_CC_X_AND_Y
)
14500 case EQ
: return CC_DEQmode
;
14501 case LE
: return CC_DLEmode
;
14502 case LEU
: return CC_DLEUmode
;
14503 case GE
: return CC_DGEmode
;
14504 case GEU
: return CC_DGEUmode
;
14505 default: gcc_unreachable ();
14509 if (cond_or
== DOM_CC_X_AND_Y
)
14521 gcc_unreachable ();
14525 if (cond_or
== DOM_CC_X_AND_Y
)
14537 gcc_unreachable ();
14541 if (cond_or
== DOM_CC_X_AND_Y
)
14542 return CC_DLTUmode
;
14547 return CC_DLTUmode
;
14549 return CC_DLEUmode
;
14553 gcc_unreachable ();
14557 if (cond_or
== DOM_CC_X_AND_Y
)
14558 return CC_DGTUmode
;
14563 return CC_DGTUmode
;
14565 return CC_DGEUmode
;
14569 gcc_unreachable ();
14572 /* The remaining cases only occur when both comparisons are the
14575 gcc_assert (cond1
== cond2
);
14579 gcc_assert (cond1
== cond2
);
14583 gcc_assert (cond1
== cond2
);
14587 gcc_assert (cond1
== cond2
);
14588 return CC_DLEUmode
;
14591 gcc_assert (cond1
== cond2
);
14592 return CC_DGEUmode
;
14595 gcc_unreachable ();
14600 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14602 /* All floating point compares return CCFP if it is an equality
14603 comparison, and CCFPE otherwise. */
14604 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14627 gcc_unreachable ();
14631 /* A compare with a shifted operand. Because of canonicalization, the
14632 comparison will have to be swapped when we emit the assembler. */
14633 if (GET_MODE (y
) == SImode
14634 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14635 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14636 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14637 || GET_CODE (x
) == ROTATERT
))
14640 /* This operation is performed swapped, but since we only rely on the Z
14641 flag we don't need an additional mode. */
14642 if (GET_MODE (y
) == SImode
14643 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14644 && GET_CODE (x
) == NEG
14645 && (op
== EQ
|| op
== NE
))
14648 /* This is a special case that is used by combine to allow a
14649 comparison of a shifted byte load to be split into a zero-extend
14650 followed by a comparison of the shifted integer (only valid for
14651 equalities and unsigned inequalities). */
14652 if (GET_MODE (x
) == SImode
14653 && GET_CODE (x
) == ASHIFT
14654 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14655 && GET_CODE (XEXP (x
, 0)) == SUBREG
14656 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14657 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14658 && (op
== EQ
|| op
== NE
14659 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14660 && CONST_INT_P (y
))
14663 /* A construct for a conditional compare, if the false arm contains
14664 0, then both conditions must be true, otherwise either condition
14665 must be true. Not all conditions are possible, so CCmode is
14666 returned if it can't be done. */
14667 if (GET_CODE (x
) == IF_THEN_ELSE
14668 && (XEXP (x
, 2) == const0_rtx
14669 || XEXP (x
, 2) == const1_rtx
)
14670 && COMPARISON_P (XEXP (x
, 0))
14671 && COMPARISON_P (XEXP (x
, 1)))
14672 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14673 INTVAL (XEXP (x
, 2)));
14675 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14676 if (GET_CODE (x
) == AND
14677 && (op
== EQ
|| op
== NE
)
14678 && COMPARISON_P (XEXP (x
, 0))
14679 && COMPARISON_P (XEXP (x
, 1)))
14680 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14683 if (GET_CODE (x
) == IOR
14684 && (op
== EQ
|| op
== NE
)
14685 && COMPARISON_P (XEXP (x
, 0))
14686 && COMPARISON_P (XEXP (x
, 1)))
14687 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14690 /* An operation (on Thumb) where we want to test for a single bit.
14691 This is done by shifting that bit up into the top bit of a
14692 scratch register; we can then branch on the sign bit. */
14694 && GET_MODE (x
) == SImode
14695 && (op
== EQ
|| op
== NE
)
14696 && GET_CODE (x
) == ZERO_EXTRACT
14697 && XEXP (x
, 1) == const1_rtx
)
14700 /* An operation that sets the condition codes as a side-effect, the
14701 V flag is not set correctly, so we can only use comparisons where
14702 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14704 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14705 if (GET_MODE (x
) == SImode
14707 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14708 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14709 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14710 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14711 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14712 || GET_CODE (x
) == LSHIFTRT
14713 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14714 || GET_CODE (x
) == ROTATERT
14715 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14716 return CC_NOOVmode
;
14718 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14721 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14722 && GET_CODE (x
) == PLUS
14723 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14726 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14732 /* A DImode comparison against zero can be implemented by
14733 or'ing the two halves together. */
14734 if (y
== const0_rtx
)
14737 /* We can do an equality test in three Thumb instructions. */
14747 /* DImode unsigned comparisons can be implemented by cmp +
14748 cmpeq without a scratch register. Not worth doing in
14759 /* DImode signed and unsigned comparisons can be implemented
14760 by cmp + sbcs with a scratch register, but that does not
14761 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14762 gcc_assert (op
!= EQ
&& op
!= NE
);
14766 gcc_unreachable ();
14770 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14771 return GET_MODE (x
);
14776 /* X and Y are two things to compare using CODE. Emit the compare insn and
14777 return the rtx for register 0 in the proper mode. FP means this is a
14778 floating point compare: I don't think that it is needed on the arm. */
14780 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14784 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14786 /* We might have X as a constant, Y as a register because of the predicates
14787 used for cmpdi. If so, force X to a register here. */
14788 if (dimode_comparison
&& !REG_P (x
))
14789 x
= force_reg (DImode
, x
);
14791 mode
= SELECT_CC_MODE (code
, x
, y
);
14792 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14794 if (dimode_comparison
14795 && mode
!= CC_CZmode
)
14799 /* To compare two non-zero values for equality, XOR them and
14800 then compare against zero. Not used for ARM mode; there
14801 CC_CZmode is cheaper. */
14802 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14804 gcc_assert (!reload_completed
);
14805 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14809 /* A scratch register is required. */
14810 if (reload_completed
)
14811 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14813 scratch
= gen_rtx_SCRATCH (SImode
);
14815 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14816 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14817 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14820 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14825 /* Generate a sequence of insns that will generate the correct return
14826 address mask depending on the physical architecture that the program
14829 arm_gen_return_addr_mask (void)
14831 rtx reg
= gen_reg_rtx (Pmode
);
14833 emit_insn (gen_return_addr_mask (reg
));
14838 arm_reload_in_hi (rtx
*operands
)
14840 rtx ref
= operands
[1];
14842 HOST_WIDE_INT offset
= 0;
14844 if (GET_CODE (ref
) == SUBREG
)
14846 offset
= SUBREG_BYTE (ref
);
14847 ref
= SUBREG_REG (ref
);
14852 /* We have a pseudo which has been spilt onto the stack; there
14853 are two cases here: the first where there is a simple
14854 stack-slot replacement and a second where the stack-slot is
14855 out of range, or is used as a subreg. */
14856 if (reg_equiv_mem (REGNO (ref
)))
14858 ref
= reg_equiv_mem (REGNO (ref
));
14859 base
= find_replacement (&XEXP (ref
, 0));
14862 /* The slot is out of range, or was dressed up in a SUBREG. */
14863 base
= reg_equiv_address (REGNO (ref
));
14865 /* PR 62554: If there is no equivalent memory location then just move
14866 the value as an SImode register move. This happens when the target
14867 architecture variant does not have an HImode register move. */
14870 gcc_assert (REG_P (operands
[0]));
14871 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14872 gen_rtx_SUBREG (SImode
, ref
, 0)));
14877 base
= find_replacement (&XEXP (ref
, 0));
14879 /* Handle the case where the address is too complex to be offset by 1. */
14880 if (GET_CODE (base
) == MINUS
14881 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14883 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14885 emit_set_insn (base_plus
, base
);
14888 else if (GET_CODE (base
) == PLUS
)
14890 /* The addend must be CONST_INT, or we would have dealt with it above. */
14891 HOST_WIDE_INT hi
, lo
;
14893 offset
+= INTVAL (XEXP (base
, 1));
14894 base
= XEXP (base
, 0);
14896 /* Rework the address into a legal sequence of insns. */
14897 /* Valid range for lo is -4095 -> 4095 */
14900 : -((-offset
) & 0xfff));
14902 /* Corner case, if lo is the max offset then we would be out of range
14903 once we have added the additional 1 below, so bump the msb into the
14904 pre-loading insn(s). */
14908 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14909 ^ (HOST_WIDE_INT
) 0x80000000)
14910 - (HOST_WIDE_INT
) 0x80000000);
14912 gcc_assert (hi
+ lo
== offset
);
14916 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14918 /* Get the base address; addsi3 knows how to handle constants
14919 that require more than one insn. */
14920 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14926 /* Operands[2] may overlap operands[0] (though it won't overlap
14927 operands[1]), that's why we asked for a DImode reg -- so we can
14928 use the bit that does not overlap. */
14929 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14930 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14932 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14934 emit_insn (gen_zero_extendqisi2 (scratch
,
14935 gen_rtx_MEM (QImode
,
14936 plus_constant (Pmode
, base
,
14938 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14939 gen_rtx_MEM (QImode
,
14940 plus_constant (Pmode
, base
,
14942 if (!BYTES_BIG_ENDIAN
)
14943 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14944 gen_rtx_IOR (SImode
,
14947 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14951 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14952 gen_rtx_IOR (SImode
,
14953 gen_rtx_ASHIFT (SImode
, scratch
,
14955 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14958 /* Handle storing a half-word to memory during reload by synthesizing as two
14959 byte stores. Take care not to clobber the input values until after we
14960 have moved them somewhere safe. This code assumes that if the DImode
14961 scratch in operands[2] overlaps either the input value or output address
14962 in some way, then that value must die in this insn (we absolutely need
14963 two scratch registers for some corner cases). */
14965 arm_reload_out_hi (rtx
*operands
)
14967 rtx ref
= operands
[0];
14968 rtx outval
= operands
[1];
14970 HOST_WIDE_INT offset
= 0;
14972 if (GET_CODE (ref
) == SUBREG
)
14974 offset
= SUBREG_BYTE (ref
);
14975 ref
= SUBREG_REG (ref
);
14980 /* We have a pseudo which has been spilt onto the stack; there
14981 are two cases here: the first where there is a simple
14982 stack-slot replacement and a second where the stack-slot is
14983 out of range, or is used as a subreg. */
14984 if (reg_equiv_mem (REGNO (ref
)))
14986 ref
= reg_equiv_mem (REGNO (ref
));
14987 base
= find_replacement (&XEXP (ref
, 0));
14990 /* The slot is out of range, or was dressed up in a SUBREG. */
14991 base
= reg_equiv_address (REGNO (ref
));
14993 /* PR 62254: If there is no equivalent memory location then just move
14994 the value as an SImode register move. This happens when the target
14995 architecture variant does not have an HImode register move. */
14998 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15000 if (REG_P (outval
))
15002 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15003 gen_rtx_SUBREG (SImode
, outval
, 0)));
15005 else /* SUBREG_P (outval) */
15007 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15008 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15009 SUBREG_REG (outval
)));
15011 /* FIXME: Handle other cases ? */
15012 gcc_unreachable ();
15018 base
= find_replacement (&XEXP (ref
, 0));
15020 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15022 /* Handle the case where the address is too complex to be offset by 1. */
15023 if (GET_CODE (base
) == MINUS
15024 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15026 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15028 /* Be careful not to destroy OUTVAL. */
15029 if (reg_overlap_mentioned_p (base_plus
, outval
))
15031 /* Updating base_plus might destroy outval, see if we can
15032 swap the scratch and base_plus. */
15033 if (!reg_overlap_mentioned_p (scratch
, outval
))
15034 std::swap (scratch
, base_plus
);
15037 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15039 /* Be conservative and copy OUTVAL into the scratch now,
15040 this should only be necessary if outval is a subreg
15041 of something larger than a word. */
15042 /* XXX Might this clobber base? I can't see how it can,
15043 since scratch is known to overlap with OUTVAL, and
15044 must be wider than a word. */
15045 emit_insn (gen_movhi (scratch_hi
, outval
));
15046 outval
= scratch_hi
;
15050 emit_set_insn (base_plus
, base
);
15053 else if (GET_CODE (base
) == PLUS
)
15055 /* The addend must be CONST_INT, or we would have dealt with it above. */
15056 HOST_WIDE_INT hi
, lo
;
15058 offset
+= INTVAL (XEXP (base
, 1));
15059 base
= XEXP (base
, 0);
15061 /* Rework the address into a legal sequence of insns. */
15062 /* Valid range for lo is -4095 -> 4095 */
15065 : -((-offset
) & 0xfff));
15067 /* Corner case, if lo is the max offset then we would be out of range
15068 once we have added the additional 1 below, so bump the msb into the
15069 pre-loading insn(s). */
15073 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15074 ^ (HOST_WIDE_INT
) 0x80000000)
15075 - (HOST_WIDE_INT
) 0x80000000);
15077 gcc_assert (hi
+ lo
== offset
);
15081 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15083 /* Be careful not to destroy OUTVAL. */
15084 if (reg_overlap_mentioned_p (base_plus
, outval
))
15086 /* Updating base_plus might destroy outval, see if we
15087 can swap the scratch and base_plus. */
15088 if (!reg_overlap_mentioned_p (scratch
, outval
))
15089 std::swap (scratch
, base_plus
);
15092 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15094 /* Be conservative and copy outval into scratch now,
15095 this should only be necessary if outval is a
15096 subreg of something larger than a word. */
15097 /* XXX Might this clobber base? I can't see how it
15098 can, since scratch is known to overlap with
15100 emit_insn (gen_movhi (scratch_hi
, outval
));
15101 outval
= scratch_hi
;
15105 /* Get the base address; addsi3 knows how to handle constants
15106 that require more than one insn. */
15107 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15113 if (BYTES_BIG_ENDIAN
)
15115 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15116 plus_constant (Pmode
, base
,
15118 gen_lowpart (QImode
, outval
)));
15119 emit_insn (gen_lshrsi3 (scratch
,
15120 gen_rtx_SUBREG (SImode
, outval
, 0),
15122 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15124 gen_lowpart (QImode
, scratch
)));
15128 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15130 gen_lowpart (QImode
, outval
)));
15131 emit_insn (gen_lshrsi3 (scratch
,
15132 gen_rtx_SUBREG (SImode
, outval
, 0),
15134 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15135 plus_constant (Pmode
, base
,
15137 gen_lowpart (QImode
, scratch
)));
15141 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15142 (padded to the size of a word) should be passed in a register. */
15145 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15147 if (TARGET_AAPCS_BASED
)
15148 return must_pass_in_stack_var_size (mode
, type
);
15150 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15154 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15155 Return true if an argument passed on the stack should be padded upwards,
15156 i.e. if the least-significant byte has useful data.
15157 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15158 aggregate types are placed in the lowest memory address. */
15161 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15163 if (!TARGET_AAPCS_BASED
)
15164 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15166 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15173 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15174 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15175 register has useful data, and return the opposite if the most
15176 significant byte does. */
15179 arm_pad_reg_upward (machine_mode mode
,
15180 tree type
, int first ATTRIBUTE_UNUSED
)
15182 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15184 /* For AAPCS, small aggregates, small fixed-point types,
15185 and small complex types are always padded upwards. */
15188 if ((AGGREGATE_TYPE_P (type
)
15189 || TREE_CODE (type
) == COMPLEX_TYPE
15190 || FIXED_POINT_TYPE_P (type
))
15191 && int_size_in_bytes (type
) <= 4)
15196 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15197 && GET_MODE_SIZE (mode
) <= 4)
15202 /* Otherwise, use default padding. */
15203 return !BYTES_BIG_ENDIAN
;
15206 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15207 assuming that the address in the base register is word aligned. */
15209 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15211 HOST_WIDE_INT max_offset
;
15213 /* Offset must be a multiple of 4 in Thumb mode. */
15214 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15219 else if (TARGET_ARM
)
15224 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15227 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15228 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15229 Assumes that the address in the base register RN is word aligned. Pattern
15230 guarantees that both memory accesses use the same base register,
15231 the offsets are constants within the range, and the gap between the offsets is 4.
15232 If preload complete then check that registers are legal. WBACK indicates whether
15233 address is updated. LOAD indicates whether memory access is load or store. */
15235 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15236 bool wback
, bool load
)
15238 unsigned int t
, t2
, n
;
15240 if (!reload_completed
)
15243 if (!offset_ok_for_ldrd_strd (offset
))
15250 if ((TARGET_THUMB2
)
15251 && ((wback
&& (n
== t
|| n
== t2
))
15252 || (t
== SP_REGNUM
)
15253 || (t
== PC_REGNUM
)
15254 || (t2
== SP_REGNUM
)
15255 || (t2
== PC_REGNUM
)
15256 || (!load
&& (n
== PC_REGNUM
))
15257 || (load
&& (t
== t2
))
15258 /* Triggers Cortex-M3 LDRD errata. */
15259 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15263 && ((wback
&& (n
== t
|| n
== t2
))
15264 || (t2
== PC_REGNUM
)
15265 || (t
% 2 != 0) /* First destination register is not even. */
15267 /* PC can be used as base register (for offset addressing only),
15268 but it is depricated. */
15269 || (n
== PC_REGNUM
)))
15275 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15276 operand MEM's address contains an immediate offset from the base
15277 register and has no side effects, in which case it sets BASE and
15278 OFFSET accordingly. */
15280 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15284 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15286 /* TODO: Handle more general memory operand patterns, such as
15287 PRE_DEC and PRE_INC. */
15289 if (side_effects_p (mem
))
15292 /* Can't deal with subregs. */
15293 if (GET_CODE (mem
) == SUBREG
)
15296 gcc_assert (MEM_P (mem
));
15298 *offset
= const0_rtx
;
15300 addr
= XEXP (mem
, 0);
15302 /* If addr isn't valid for DImode, then we can't handle it. */
15303 if (!arm_legitimate_address_p (DImode
, addr
,
15304 reload_in_progress
|| reload_completed
))
15312 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15314 *base
= XEXP (addr
, 0);
15315 *offset
= XEXP (addr
, 1);
15316 return (REG_P (*base
) && CONST_INT_P (*offset
));
15322 /* Called from a peephole2 to replace two word-size accesses with a
15323 single LDRD/STRD instruction. Returns true iff we can generate a
15324 new instruction sequence. That is, both accesses use the same base
15325 register and the gap between constant offsets is 4. This function
15326 may reorder its operands to match ldrd/strd RTL templates.
15327 OPERANDS are the operands found by the peephole matcher;
15328 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15329 corresponding memory operands. LOAD indicaates whether the access
15330 is load or store. CONST_STORE indicates a store of constant
15331 integer values held in OPERANDS[4,5] and assumes that the pattern
15332 is of length 4 insn, for the purpose of checking dead registers.
15333 COMMUTE indicates that register operands may be reordered. */
15335 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15336 bool const_store
, bool commute
)
15339 HOST_WIDE_INT offsets
[2], offset
;
15340 rtx base
= NULL_RTX
;
15341 rtx cur_base
, cur_offset
, tmp
;
15343 HARD_REG_SET regset
;
15345 gcc_assert (!const_store
|| !load
);
15346 /* Check that the memory references are immediate offsets from the
15347 same base register. Extract the base register, the destination
15348 registers, and the corresponding memory offsets. */
15349 for (i
= 0; i
< nops
; i
++)
15351 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15356 else if (REGNO (base
) != REGNO (cur_base
))
15359 offsets
[i
] = INTVAL (cur_offset
);
15360 if (GET_CODE (operands
[i
]) == SUBREG
)
15362 tmp
= SUBREG_REG (operands
[i
]);
15363 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15368 /* Make sure there is no dependency between the individual loads. */
15369 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15370 return false; /* RAW */
15372 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15373 return false; /* WAW */
15375 /* If the same input register is used in both stores
15376 when storing different constants, try to find a free register.
15377 For example, the code
15382 can be transformed into
15386 in Thumb mode assuming that r1 is free.
15387 For ARM mode do the same but only if the starting register
15388 can be made to be even. */
15390 && REGNO (operands
[0]) == REGNO (operands
[1])
15391 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15395 CLEAR_HARD_REG_SET (regset
);
15396 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15397 if (tmp
== NULL_RTX
)
15400 /* Use the new register in the first load to ensure that
15401 if the original input register is not dead after peephole,
15402 then it will have the correct constant value. */
15405 else if (TARGET_ARM
)
15407 int regno
= REGNO (operands
[0]);
15408 if (!peep2_reg_dead_p (4, operands
[0]))
15410 /* When the input register is even and is not dead after the
15411 pattern, it has to hold the second constant but we cannot
15412 form a legal STRD in ARM mode with this register as the second
15414 if (regno
% 2 == 0)
15417 /* Is regno-1 free? */
15418 SET_HARD_REG_SET (regset
);
15419 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15420 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15421 if (tmp
== NULL_RTX
)
15428 /* Find a DImode register. */
15429 CLEAR_HARD_REG_SET (regset
);
15430 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15431 if (tmp
!= NULL_RTX
)
15433 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15434 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15438 /* Can we use the input register to form a DI register? */
15439 SET_HARD_REG_SET (regset
);
15440 CLEAR_HARD_REG_BIT(regset
,
15441 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15442 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15443 if (tmp
== NULL_RTX
)
15445 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15449 gcc_assert (operands
[0] != NULL_RTX
);
15450 gcc_assert (operands
[1] != NULL_RTX
);
15451 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15452 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15456 /* Make sure the instructions are ordered with lower memory access first. */
15457 if (offsets
[0] > offsets
[1])
15459 gap
= offsets
[0] - offsets
[1];
15460 offset
= offsets
[1];
15462 /* Swap the instructions such that lower memory is accessed first. */
15463 std::swap (operands
[0], operands
[1]);
15464 std::swap (operands
[2], operands
[3]);
15466 std::swap (operands
[4], operands
[5]);
15470 gap
= offsets
[1] - offsets
[0];
15471 offset
= offsets
[0];
15474 /* Make sure accesses are to consecutive memory locations. */
15478 /* Make sure we generate legal instructions. */
15479 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15483 /* In Thumb state, where registers are almost unconstrained, there
15484 is little hope to fix it. */
15488 if (load
&& commute
)
15490 /* Try reordering registers. */
15491 std::swap (operands
[0], operands
[1]);
15492 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15499 /* If input registers are dead after this pattern, they can be
15500 reordered or replaced by other registers that are free in the
15501 current pattern. */
15502 if (!peep2_reg_dead_p (4, operands
[0])
15503 || !peep2_reg_dead_p (4, operands
[1]))
15506 /* Try to reorder the input registers. */
15507 /* For example, the code
15512 can be transformed into
15517 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15520 std::swap (operands
[0], operands
[1]);
15524 /* Try to find a free DI register. */
15525 CLEAR_HARD_REG_SET (regset
);
15526 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15527 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15530 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15531 if (tmp
== NULL_RTX
)
15534 /* DREG must be an even-numbered register in DImode.
15535 Split it into SI registers. */
15536 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15537 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15538 gcc_assert (operands
[0] != NULL_RTX
);
15539 gcc_assert (operands
[1] != NULL_RTX
);
15540 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15541 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15543 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15555 /* Print a symbolic form of X to the debug file, F. */
15557 arm_print_value (FILE *f
, rtx x
)
15559 switch (GET_CODE (x
))
15562 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15566 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15574 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15576 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15577 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15585 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15589 fprintf (f
, "`%s'", XSTR (x
, 0));
15593 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15597 arm_print_value (f
, XEXP (x
, 0));
15601 arm_print_value (f
, XEXP (x
, 0));
15603 arm_print_value (f
, XEXP (x
, 1));
15611 fprintf (f
, "????");
15616 /* Routines for manipulation of the constant pool. */
15618 /* Arm instructions cannot load a large constant directly into a
15619 register; they have to come from a pc relative load. The constant
15620 must therefore be placed in the addressable range of the pc
15621 relative load. Depending on the precise pc relative load
15622 instruction the range is somewhere between 256 bytes and 4k. This
15623 means that we often have to dump a constant inside a function, and
15624 generate code to branch around it.
15626 It is important to minimize this, since the branches will slow
15627 things down and make the code larger.
15629 Normally we can hide the table after an existing unconditional
15630 branch so that there is no interruption of the flow, but in the
15631 worst case the code looks like this:
15649 We fix this by performing a scan after scheduling, which notices
15650 which instructions need to have their operands fetched from the
15651 constant table and builds the table.
15653 The algorithm starts by building a table of all the constants that
15654 need fixing up and all the natural barriers in the function (places
15655 where a constant table can be dropped without breaking the flow).
15656 For each fixup we note how far the pc-relative replacement will be
15657 able to reach and the offset of the instruction into the function.
15659 Having built the table we then group the fixes together to form
15660 tables that are as large as possible (subject to addressing
15661 constraints) and emit each table of constants after the last
15662 barrier that is within range of all the instructions in the group.
15663 If a group does not contain a barrier, then we forcibly create one
15664 by inserting a jump instruction into the flow. Once the table has
15665 been inserted, the insns are then modified to reference the
15666 relevant entry in the pool.
15668 Possible enhancements to the algorithm (not implemented) are:
15670 1) For some processors and object formats, there may be benefit in
15671 aligning the pools to the start of cache lines; this alignment
15672 would need to be taken into account when calculating addressability
15675 /* These typedefs are located at the start of this file, so that
15676 they can be used in the prototypes there. This comment is to
15677 remind readers of that fact so that the following structures
15678 can be understood more easily.
15680 typedef struct minipool_node Mnode;
15681 typedef struct minipool_fixup Mfix; */
15683 struct minipool_node
15685 /* Doubly linked chain of entries. */
15688 /* The maximum offset into the code that this entry can be placed. While
15689 pushing fixes for forward references, all entries are sorted in order
15690 of increasing max_address. */
15691 HOST_WIDE_INT max_address
;
15692 /* Similarly for an entry inserted for a backwards ref. */
15693 HOST_WIDE_INT min_address
;
15694 /* The number of fixes referencing this entry. This can become zero
15695 if we "unpush" an entry. In this case we ignore the entry when we
15696 come to emit the code. */
15698 /* The offset from the start of the minipool. */
15699 HOST_WIDE_INT offset
;
15700 /* The value in table. */
15702 /* The mode of value. */
15704 /* The size of the value. With iWMMXt enabled
15705 sizes > 4 also imply an alignment of 8-bytes. */
15709 struct minipool_fixup
15713 HOST_WIDE_INT address
;
15719 HOST_WIDE_INT forwards
;
15720 HOST_WIDE_INT backwards
;
15723 /* Fixes less than a word need padding out to a word boundary. */
15724 #define MINIPOOL_FIX_SIZE(mode) \
15725 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15727 static Mnode
* minipool_vector_head
;
15728 static Mnode
* minipool_vector_tail
;
15729 static rtx_code_label
*minipool_vector_label
;
15730 static int minipool_pad
;
15732 /* The linked list of all minipool fixes required for this function. */
15733 Mfix
* minipool_fix_head
;
15734 Mfix
* minipool_fix_tail
;
15735 /* The fix entry for the current minipool, once it has been placed. */
15736 Mfix
* minipool_barrier
;
15738 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15739 #define JUMP_TABLES_IN_TEXT_SECTION 0
15742 static HOST_WIDE_INT
15743 get_jump_table_size (rtx_jump_table_data
*insn
)
15745 /* ADDR_VECs only take room if read-only data does into the text
15747 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15749 rtx body
= PATTERN (insn
);
15750 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15751 HOST_WIDE_INT size
;
15752 HOST_WIDE_INT modesize
;
15754 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15755 size
= modesize
* XVECLEN (body
, elt
);
15759 /* Round up size of TBB table to a halfword boundary. */
15760 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15763 /* No padding necessary for TBH. */
15766 /* Add two bytes for alignment on Thumb. */
15771 gcc_unreachable ();
15779 /* Return the maximum amount of padding that will be inserted before
15782 static HOST_WIDE_INT
15783 get_label_padding (rtx label
)
15785 HOST_WIDE_INT align
, min_insn_size
;
15787 align
= 1 << label_to_alignment (label
);
15788 min_insn_size
= TARGET_THUMB
? 2 : 4;
15789 return align
> min_insn_size
? align
- min_insn_size
: 0;
15792 /* Move a minipool fix MP from its current location to before MAX_MP.
15793 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15794 constraints may need updating. */
15796 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15797 HOST_WIDE_INT max_address
)
15799 /* The code below assumes these are different. */
15800 gcc_assert (mp
!= max_mp
);
15802 if (max_mp
== NULL
)
15804 if (max_address
< mp
->max_address
)
15805 mp
->max_address
= max_address
;
15809 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15810 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15812 mp
->max_address
= max_address
;
15814 /* Unlink MP from its current position. Since max_mp is non-null,
15815 mp->prev must be non-null. */
15816 mp
->prev
->next
= mp
->next
;
15817 if (mp
->next
!= NULL
)
15818 mp
->next
->prev
= mp
->prev
;
15820 minipool_vector_tail
= mp
->prev
;
15822 /* Re-insert it before MAX_MP. */
15824 mp
->prev
= max_mp
->prev
;
15827 if (mp
->prev
!= NULL
)
15828 mp
->prev
->next
= mp
;
15830 minipool_vector_head
= mp
;
15833 /* Save the new entry. */
15836 /* Scan over the preceding entries and adjust their addresses as
15838 while (mp
->prev
!= NULL
15839 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15841 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15848 /* Add a constant to the minipool for a forward reference. Returns the
15849 node added or NULL if the constant will not fit in this pool. */
15851 add_minipool_forward_ref (Mfix
*fix
)
15853 /* If set, max_mp is the first pool_entry that has a lower
15854 constraint than the one we are trying to add. */
15855 Mnode
* max_mp
= NULL
;
15856 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15859 /* If the minipool starts before the end of FIX->INSN then this FIX
15860 can not be placed into the current pool. Furthermore, adding the
15861 new constant pool entry may cause the pool to start FIX_SIZE bytes
15863 if (minipool_vector_head
&&
15864 (fix
->address
+ get_attr_length (fix
->insn
)
15865 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15868 /* Scan the pool to see if a constant with the same value has
15869 already been added. While we are doing this, also note the
15870 location where we must insert the constant if it doesn't already
15872 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15874 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15875 && fix
->mode
== mp
->mode
15876 && (!LABEL_P (fix
->value
)
15877 || (CODE_LABEL_NUMBER (fix
->value
)
15878 == CODE_LABEL_NUMBER (mp
->value
)))
15879 && rtx_equal_p (fix
->value
, mp
->value
))
15881 /* More than one fix references this entry. */
15883 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15886 /* Note the insertion point if necessary. */
15888 && mp
->max_address
> max_address
)
15891 /* If we are inserting an 8-bytes aligned quantity and
15892 we have not already found an insertion point, then
15893 make sure that all such 8-byte aligned quantities are
15894 placed at the start of the pool. */
15895 if (ARM_DOUBLEWORD_ALIGN
15897 && fix
->fix_size
>= 8
15898 && mp
->fix_size
< 8)
15901 max_address
= mp
->max_address
;
15905 /* The value is not currently in the minipool, so we need to create
15906 a new entry for it. If MAX_MP is NULL, the entry will be put on
15907 the end of the list since the placement is less constrained than
15908 any existing entry. Otherwise, we insert the new fix before
15909 MAX_MP and, if necessary, adjust the constraints on the other
15912 mp
->fix_size
= fix
->fix_size
;
15913 mp
->mode
= fix
->mode
;
15914 mp
->value
= fix
->value
;
15916 /* Not yet required for a backwards ref. */
15917 mp
->min_address
= -65536;
15919 if (max_mp
== NULL
)
15921 mp
->max_address
= max_address
;
15923 mp
->prev
= minipool_vector_tail
;
15925 if (mp
->prev
== NULL
)
15927 minipool_vector_head
= mp
;
15928 minipool_vector_label
= gen_label_rtx ();
15931 mp
->prev
->next
= mp
;
15933 minipool_vector_tail
= mp
;
15937 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15938 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15940 mp
->max_address
= max_address
;
15943 mp
->prev
= max_mp
->prev
;
15945 if (mp
->prev
!= NULL
)
15946 mp
->prev
->next
= mp
;
15948 minipool_vector_head
= mp
;
15951 /* Save the new entry. */
15954 /* Scan over the preceding entries and adjust their addresses as
15956 while (mp
->prev
!= NULL
15957 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15959 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15967 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15968 HOST_WIDE_INT min_address
)
15970 HOST_WIDE_INT offset
;
15972 /* The code below assumes these are different. */
15973 gcc_assert (mp
!= min_mp
);
15975 if (min_mp
== NULL
)
15977 if (min_address
> mp
->min_address
)
15978 mp
->min_address
= min_address
;
15982 /* We will adjust this below if it is too loose. */
15983 mp
->min_address
= min_address
;
15985 /* Unlink MP from its current position. Since min_mp is non-null,
15986 mp->next must be non-null. */
15987 mp
->next
->prev
= mp
->prev
;
15988 if (mp
->prev
!= NULL
)
15989 mp
->prev
->next
= mp
->next
;
15991 minipool_vector_head
= mp
->next
;
15993 /* Reinsert it after MIN_MP. */
15995 mp
->next
= min_mp
->next
;
15997 if (mp
->next
!= NULL
)
15998 mp
->next
->prev
= mp
;
16000 minipool_vector_tail
= mp
;
16006 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16008 mp
->offset
= offset
;
16009 if (mp
->refcount
> 0)
16010 offset
+= mp
->fix_size
;
16012 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16013 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16019 /* Add a constant to the minipool for a backward reference. Returns the
16020 node added or NULL if the constant will not fit in this pool.
16022 Note that the code for insertion for a backwards reference can be
16023 somewhat confusing because the calculated offsets for each fix do
16024 not take into account the size of the pool (which is still under
16027 add_minipool_backward_ref (Mfix
*fix
)
16029 /* If set, min_mp is the last pool_entry that has a lower constraint
16030 than the one we are trying to add. */
16031 Mnode
*min_mp
= NULL
;
16032 /* This can be negative, since it is only a constraint. */
16033 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16036 /* If we can't reach the current pool from this insn, or if we can't
16037 insert this entry at the end of the pool without pushing other
16038 fixes out of range, then we don't try. This ensures that we
16039 can't fail later on. */
16040 if (min_address
>= minipool_barrier
->address
16041 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16042 >= minipool_barrier
->address
))
16045 /* Scan the pool to see if a constant with the same value has
16046 already been added. While we are doing this, also note the
16047 location where we must insert the constant if it doesn't already
16049 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16051 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16052 && fix
->mode
== mp
->mode
16053 && (!LABEL_P (fix
->value
)
16054 || (CODE_LABEL_NUMBER (fix
->value
)
16055 == CODE_LABEL_NUMBER (mp
->value
)))
16056 && rtx_equal_p (fix
->value
, mp
->value
)
16057 /* Check that there is enough slack to move this entry to the
16058 end of the table (this is conservative). */
16059 && (mp
->max_address
16060 > (minipool_barrier
->address
16061 + minipool_vector_tail
->offset
16062 + minipool_vector_tail
->fix_size
)))
16065 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16068 if (min_mp
!= NULL
)
16069 mp
->min_address
+= fix
->fix_size
;
16072 /* Note the insertion point if necessary. */
16073 if (mp
->min_address
< min_address
)
16075 /* For now, we do not allow the insertion of 8-byte alignment
16076 requiring nodes anywhere but at the start of the pool. */
16077 if (ARM_DOUBLEWORD_ALIGN
16078 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16083 else if (mp
->max_address
16084 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16086 /* Inserting before this entry would push the fix beyond
16087 its maximum address (which can happen if we have
16088 re-located a forwards fix); force the new fix to come
16090 if (ARM_DOUBLEWORD_ALIGN
16091 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16096 min_address
= mp
->min_address
+ fix
->fix_size
;
16099 /* Do not insert a non-8-byte aligned quantity before 8-byte
16100 aligned quantities. */
16101 else if (ARM_DOUBLEWORD_ALIGN
16102 && fix
->fix_size
< 8
16103 && mp
->fix_size
>= 8)
16106 min_address
= mp
->min_address
+ fix
->fix_size
;
16111 /* We need to create a new entry. */
16113 mp
->fix_size
= fix
->fix_size
;
16114 mp
->mode
= fix
->mode
;
16115 mp
->value
= fix
->value
;
16117 mp
->max_address
= minipool_barrier
->address
+ 65536;
16119 mp
->min_address
= min_address
;
16121 if (min_mp
== NULL
)
16124 mp
->next
= minipool_vector_head
;
16126 if (mp
->next
== NULL
)
16128 minipool_vector_tail
= mp
;
16129 minipool_vector_label
= gen_label_rtx ();
16132 mp
->next
->prev
= mp
;
16134 minipool_vector_head
= mp
;
16138 mp
->next
= min_mp
->next
;
16142 if (mp
->next
!= NULL
)
16143 mp
->next
->prev
= mp
;
16145 minipool_vector_tail
= mp
;
16148 /* Save the new entry. */
16156 /* Scan over the following entries and adjust their offsets. */
16157 while (mp
->next
!= NULL
)
16159 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16160 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16163 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16165 mp
->next
->offset
= mp
->offset
;
16174 assign_minipool_offsets (Mfix
*barrier
)
16176 HOST_WIDE_INT offset
= 0;
16179 minipool_barrier
= barrier
;
16181 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16183 mp
->offset
= offset
;
16185 if (mp
->refcount
> 0)
16186 offset
+= mp
->fix_size
;
16190 /* Output the literal table */
16192 dump_minipool (rtx_insn
*scan
)
16198 if (ARM_DOUBLEWORD_ALIGN
)
16199 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16200 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16207 fprintf (dump_file
,
16208 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16209 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16211 scan
= emit_label_after (gen_label_rtx (), scan
);
16212 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16213 scan
= emit_label_after (minipool_vector_label
, scan
);
16215 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16217 if (mp
->refcount
> 0)
16221 fprintf (dump_file
,
16222 ";; Offset %u, min %ld, max %ld ",
16223 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16224 (unsigned long) mp
->max_address
);
16225 arm_print_value (dump_file
, mp
->value
);
16226 fputc ('\n', dump_file
);
16229 rtx val
= copy_rtx (mp
->value
);
16231 switch (GET_MODE_SIZE (mp
->mode
))
16233 #ifdef HAVE_consttable_1
16235 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16239 #ifdef HAVE_consttable_2
16241 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16245 #ifdef HAVE_consttable_4
16247 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16251 #ifdef HAVE_consttable_8
16253 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16257 #ifdef HAVE_consttable_16
16259 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16264 gcc_unreachable ();
16272 minipool_vector_head
= minipool_vector_tail
= NULL
;
16273 scan
= emit_insn_after (gen_consttable_end (), scan
);
16274 scan
= emit_barrier_after (scan
);
16277 /* Return the cost of forcibly inserting a barrier after INSN. */
16279 arm_barrier_cost (rtx_insn
*insn
)
16281 /* Basing the location of the pool on the loop depth is preferable,
16282 but at the moment, the basic block information seems to be
16283 corrupt by this stage of the compilation. */
16284 int base_cost
= 50;
16285 rtx_insn
*next
= next_nonnote_insn (insn
);
16287 if (next
!= NULL
&& LABEL_P (next
))
16290 switch (GET_CODE (insn
))
16293 /* It will always be better to place the table before the label, rather
16302 return base_cost
- 10;
16305 return base_cost
+ 10;
16309 /* Find the best place in the insn stream in the range
16310 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16311 Create the barrier by inserting a jump and add a new fix entry for
16314 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16316 HOST_WIDE_INT count
= 0;
16317 rtx_barrier
*barrier
;
16318 rtx_insn
*from
= fix
->insn
;
16319 /* The instruction after which we will insert the jump. */
16320 rtx_insn
*selected
= NULL
;
16322 /* The address at which the jump instruction will be placed. */
16323 HOST_WIDE_INT selected_address
;
16325 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16326 rtx_code_label
*label
= gen_label_rtx ();
16328 selected_cost
= arm_barrier_cost (from
);
16329 selected_address
= fix
->address
;
16331 while (from
&& count
< max_count
)
16333 rtx_jump_table_data
*tmp
;
16336 /* This code shouldn't have been called if there was a natural barrier
16338 gcc_assert (!BARRIER_P (from
));
16340 /* Count the length of this insn. This must stay in sync with the
16341 code that pushes minipool fixes. */
16342 if (LABEL_P (from
))
16343 count
+= get_label_padding (from
);
16345 count
+= get_attr_length (from
);
16347 /* If there is a jump table, add its length. */
16348 if (tablejump_p (from
, NULL
, &tmp
))
16350 count
+= get_jump_table_size (tmp
);
16352 /* Jump tables aren't in a basic block, so base the cost on
16353 the dispatch insn. If we select this location, we will
16354 still put the pool after the table. */
16355 new_cost
= arm_barrier_cost (from
);
16357 if (count
< max_count
16358 && (!selected
|| new_cost
<= selected_cost
))
16361 selected_cost
= new_cost
;
16362 selected_address
= fix
->address
+ count
;
16365 /* Continue after the dispatch table. */
16366 from
= NEXT_INSN (tmp
);
16370 new_cost
= arm_barrier_cost (from
);
16372 if (count
< max_count
16373 && (!selected
|| new_cost
<= selected_cost
))
16376 selected_cost
= new_cost
;
16377 selected_address
= fix
->address
+ count
;
16380 from
= NEXT_INSN (from
);
16383 /* Make sure that we found a place to insert the jump. */
16384 gcc_assert (selected
);
16386 /* Make sure we do not split a call and its corresponding
16387 CALL_ARG_LOCATION note. */
16388 if (CALL_P (selected
))
16390 rtx_insn
*next
= NEXT_INSN (selected
);
16391 if (next
&& NOTE_P (next
)
16392 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16396 /* Create a new JUMP_INSN that branches around a barrier. */
16397 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16398 JUMP_LABEL (from
) = label
;
16399 barrier
= emit_barrier_after (from
);
16400 emit_label_after (label
, barrier
);
16402 /* Create a minipool barrier entry for the new barrier. */
16403 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16404 new_fix
->insn
= barrier
;
16405 new_fix
->address
= selected_address
;
16406 new_fix
->next
= fix
->next
;
16407 fix
->next
= new_fix
;
16412 /* Record that there is a natural barrier in the insn stream at
16415 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16417 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16420 fix
->address
= address
;
16423 if (minipool_fix_head
!= NULL
)
16424 minipool_fix_tail
->next
= fix
;
16426 minipool_fix_head
= fix
;
16428 minipool_fix_tail
= fix
;
16431 /* Record INSN, which will need fixing up to load a value from the
16432 minipool. ADDRESS is the offset of the insn since the start of the
16433 function; LOC is a pointer to the part of the insn which requires
16434 fixing; VALUE is the constant that must be loaded, which is of type
16437 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16438 machine_mode mode
, rtx value
)
16440 gcc_assert (!arm_disable_literal_pool
);
16441 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16444 fix
->address
= address
;
16447 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16448 fix
->value
= value
;
16449 fix
->forwards
= get_attr_pool_range (insn
);
16450 fix
->backwards
= get_attr_neg_pool_range (insn
);
16451 fix
->minipool
= NULL
;
16453 /* If an insn doesn't have a range defined for it, then it isn't
16454 expecting to be reworked by this code. Better to stop now than
16455 to generate duff assembly code. */
16456 gcc_assert (fix
->forwards
|| fix
->backwards
);
16458 /* If an entry requires 8-byte alignment then assume all constant pools
16459 require 4 bytes of padding. Trying to do this later on a per-pool
16460 basis is awkward because existing pool entries have to be modified. */
16461 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16466 fprintf (dump_file
,
16467 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16468 GET_MODE_NAME (mode
),
16469 INSN_UID (insn
), (unsigned long) address
,
16470 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16471 arm_print_value (dump_file
, fix
->value
);
16472 fprintf (dump_file
, "\n");
16475 /* Add it to the chain of fixes. */
16478 if (minipool_fix_head
!= NULL
)
16479 minipool_fix_tail
->next
= fix
;
16481 minipool_fix_head
= fix
;
16483 minipool_fix_tail
= fix
;
16486 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16487 Returns the number of insns needed, or 99 if we always want to synthesize
16490 arm_max_const_double_inline_cost ()
16492 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16495 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16496 Returns the number of insns needed, or 99 if we don't know how to
16499 arm_const_double_inline_cost (rtx val
)
16501 rtx lowpart
, highpart
;
16504 mode
= GET_MODE (val
);
16506 if (mode
== VOIDmode
)
16509 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16511 lowpart
= gen_lowpart (SImode
, val
);
16512 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16514 gcc_assert (CONST_INT_P (lowpart
));
16515 gcc_assert (CONST_INT_P (highpart
));
16517 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16518 NULL_RTX
, NULL_RTX
, 0, 0)
16519 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16520 NULL_RTX
, NULL_RTX
, 0, 0));
16523 /* Cost of loading a SImode constant. */
16525 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16527 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16528 NULL_RTX
, NULL_RTX
, 1, 0);
16531 /* Return true if it is worthwhile to split a 64-bit constant into two
16532 32-bit operations. This is the case if optimizing for size, or
16533 if we have load delay slots, or if one 32-bit part can be done with
16534 a single data operation. */
16536 arm_const_double_by_parts (rtx val
)
16538 machine_mode mode
= GET_MODE (val
);
16541 if (optimize_size
|| arm_ld_sched
)
16544 if (mode
== VOIDmode
)
16547 part
= gen_highpart_mode (SImode
, mode
, val
);
16549 gcc_assert (CONST_INT_P (part
));
16551 if (const_ok_for_arm (INTVAL (part
))
16552 || const_ok_for_arm (~INTVAL (part
)))
16555 part
= gen_lowpart (SImode
, val
);
16557 gcc_assert (CONST_INT_P (part
));
16559 if (const_ok_for_arm (INTVAL (part
))
16560 || const_ok_for_arm (~INTVAL (part
)))
16566 /* Return true if it is possible to inline both the high and low parts
16567 of a 64-bit constant into 32-bit data processing instructions. */
16569 arm_const_double_by_immediates (rtx val
)
16571 machine_mode mode
= GET_MODE (val
);
16574 if (mode
== VOIDmode
)
16577 part
= gen_highpart_mode (SImode
, mode
, val
);
16579 gcc_assert (CONST_INT_P (part
));
16581 if (!const_ok_for_arm (INTVAL (part
)))
16584 part
= gen_lowpart (SImode
, val
);
16586 gcc_assert (CONST_INT_P (part
));
16588 if (!const_ok_for_arm (INTVAL (part
)))
16594 /* Scan INSN and note any of its operands that need fixing.
16595 If DO_PUSHES is false we do not actually push any of the fixups
16598 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16602 extract_constrain_insn (insn
);
16604 if (recog_data
.n_alternatives
== 0)
16607 /* Fill in recog_op_alt with information about the constraints of
16609 preprocess_constraints (insn
);
16611 const operand_alternative
*op_alt
= which_op_alt ();
16612 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16614 /* Things we need to fix can only occur in inputs. */
16615 if (recog_data
.operand_type
[opno
] != OP_IN
)
16618 /* If this alternative is a memory reference, then any mention
16619 of constants in this alternative is really to fool reload
16620 into allowing us to accept one there. We need to fix them up
16621 now so that we output the right code. */
16622 if (op_alt
[opno
].memory_ok
)
16624 rtx op
= recog_data
.operand
[opno
];
16626 if (CONSTANT_P (op
))
16629 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16630 recog_data
.operand_mode
[opno
], op
);
16632 else if (MEM_P (op
)
16633 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16634 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16638 rtx cop
= avoid_constant_pool_reference (op
);
16640 /* Casting the address of something to a mode narrower
16641 than a word can cause avoid_constant_pool_reference()
16642 to return the pool reference itself. That's no good to
16643 us here. Lets just hope that we can use the
16644 constant pool value directly. */
16646 cop
= get_pool_constant (XEXP (op
, 0));
16648 push_minipool_fix (insn
, address
,
16649 recog_data
.operand_loc
[opno
],
16650 recog_data
.operand_mode
[opno
], cop
);
16660 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16661 and unions in the context of ARMv8-M Security Extensions. It is used as a
16662 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16663 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16664 or four masks, depending on whether it is being computed for a
16665 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16666 respectively. The tree for the type of the argument or a field within an
16667 argument is passed in ARG_TYPE, the current register this argument or field
16668 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16669 argument or field starts at is passed in STARTING_BIT and the last used bit
16670 is kept in LAST_USED_BIT which is also updated accordingly. */
16672 static unsigned HOST_WIDE_INT
16673 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16674 uint32_t * padding_bits_to_clear
,
16675 unsigned starting_bit
, int * last_used_bit
)
16678 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16680 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16682 unsigned current_bit
= starting_bit
;
16684 long int offset
, size
;
16687 field
= TYPE_FIELDS (arg_type
);
16690 /* The offset within a structure is always an offset from
16691 the start of that structure. Make sure we take that into the
16692 calculation of the register based offset that we use here. */
16693 offset
= starting_bit
;
16694 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16697 /* This is the actual size of the field, for bitfields this is the
16698 bitfield width and not the container size. */
16699 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16701 if (*last_used_bit
!= offset
)
16703 if (offset
< *last_used_bit
)
16705 /* This field's offset is before the 'last_used_bit', that
16706 means this field goes on the next register. So we need to
16707 pad the rest of the current register and increase the
16708 register number. */
16710 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16713 padding_bits_to_clear
[*regno
] |= mask
;
16714 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16719 /* Otherwise we pad the bits between the last field's end and
16720 the start of the new field. */
16723 mask
= ((uint32_t)-1) >> (32 - offset
);
16724 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16725 padding_bits_to_clear
[*regno
] |= mask
;
16727 current_bit
= offset
;
16730 /* Calculate further padding bits for inner structs/unions too. */
16731 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16733 *last_used_bit
= current_bit
;
16734 not_to_clear_reg_mask
16735 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16736 padding_bits_to_clear
, offset
,
16741 /* Update 'current_bit' with this field's size. If the
16742 'current_bit' lies in a subsequent register, update 'regno' and
16743 reset 'current_bit' to point to the current bit in that new
16745 current_bit
+= size
;
16746 while (current_bit
>= 32)
16749 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16752 *last_used_bit
= current_bit
;
16755 field
= TREE_CHAIN (field
);
16757 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16759 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16761 tree field
, field_t
;
16762 int i
, regno_t
, field_size
;
16766 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16767 = {-1, -1, -1, -1};
16769 /* To compute the padding bits in a union we only consider bits as
16770 padding bits if they are always either a padding bit or fall outside a
16771 fields size for all fields in the union. */
16772 field
= TYPE_FIELDS (arg_type
);
16775 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16776 = {0U, 0U, 0U, 0U};
16777 int last_used_bit_t
= *last_used_bit
;
16779 field_t
= TREE_TYPE (field
);
16781 /* If the field's type is either a record or a union make sure to
16782 compute their padding bits too. */
16783 if (RECORD_OR_UNION_TYPE_P (field_t
))
16784 not_to_clear_reg_mask
16785 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16786 &padding_bits_to_clear_t
[0],
16787 starting_bit
, &last_used_bit_t
);
16790 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16791 regno_t
= (field_size
/ 32) + *regno
;
16792 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16795 for (i
= *regno
; i
< regno_t
; i
++)
16797 /* For all but the last register used by this field only keep the
16798 padding bits that were padding bits in this field. */
16799 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16802 /* For the last register, keep all padding bits that were padding
16803 bits in this field and any padding bits that are still valid
16804 as padding bits but fall outside of this field's size. */
16805 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16806 padding_bits_to_clear_res
[regno_t
]
16807 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16809 /* Update the maximum size of the fields in terms of registers used
16810 ('max_reg') and the 'last_used_bit' in said register. */
16811 if (max_reg
< regno_t
)
16814 max_bit
= last_used_bit_t
;
16816 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16817 max_bit
= last_used_bit_t
;
16819 field
= TREE_CHAIN (field
);
16822 /* Update the current padding_bits_to_clear using the intersection of the
16823 padding bits of all the fields. */
16824 for (i
=*regno
; i
< max_reg
; i
++)
16825 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16827 /* Do not keep trailing padding bits, we do not know yet whether this
16828 is the end of the argument. */
16829 mask
= ((uint32_t) 1 << max_bit
) - 1;
16830 padding_bits_to_clear
[max_reg
]
16831 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16834 *last_used_bit
= max_bit
;
16837 /* This function should only be used for structs and unions. */
16838 gcc_unreachable ();
16840 return not_to_clear_reg_mask
;
16843 /* In the context of ARMv8-M Security Extensions, this function is used for both
16844 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16845 registers are used when returning or passing arguments, which is then
16846 returned as a mask. It will also compute a mask to indicate padding/unused
16847 bits for each of these registers, and passes this through the
16848 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16849 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16850 the starting register used to pass this argument or return value is passed
16851 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16852 for struct and union types. */
16854 static unsigned HOST_WIDE_INT
16855 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16856 uint32_t * padding_bits_to_clear
)
16859 int last_used_bit
= 0;
16860 unsigned HOST_WIDE_INT not_to_clear_mask
;
16862 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16865 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16866 padding_bits_to_clear
, 0,
16870 /* If the 'last_used_bit' is not zero, that means we are still using a
16871 part of the last 'regno'. In such cases we must clear the trailing
16872 bits. Otherwise we are not using regno and we should mark it as to
16874 if (last_used_bit
!= 0)
16875 padding_bits_to_clear
[regno
]
16876 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16878 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16882 not_to_clear_mask
= 0;
16883 /* We are not dealing with structs nor unions. So these arguments may be
16884 passed in floating point registers too. In some cases a BLKmode is
16885 used when returning or passing arguments in multiple VFP registers. */
16886 if (GET_MODE (arg_rtx
) == BLKmode
)
16891 /* This should really only occur when dealing with the hard-float
16893 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16895 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16897 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16898 gcc_assert (REG_P (reg
));
16900 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16902 /* If we are dealing with DF mode, make sure we don't
16903 clear either of the registers it addresses. */
16904 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16907 unsigned HOST_WIDE_INT mask
;
16908 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16909 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16910 not_to_clear_mask
|= mask
;
16916 /* Otherwise we can rely on the MODE to determine how many registers
16917 are being used by this argument. */
16918 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16919 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16922 unsigned HOST_WIDE_INT
16923 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16924 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16925 not_to_clear_mask
|= mask
;
16930 return not_to_clear_mask
;
16933 /* Clears caller saved registers not used to pass arguments before a
16934 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16935 registers is done in __gnu_cmse_nonsecure_call libcall.
16936 See libgcc/config/arm/cmse_nonsecure_call.S. */
16939 cmse_nonsecure_call_clear_caller_saved (void)
16943 FOR_EACH_BB_FN (bb
, cfun
)
16947 FOR_BB_INSNS (bb
, insn
)
16949 uint64_t to_clear_mask
, float_mask
;
16951 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16952 unsigned int regno
, maxregno
;
16954 CUMULATIVE_ARGS args_so_far_v
;
16955 cumulative_args_t args_so_far
;
16956 tree arg_type
, fntype
;
16957 bool using_r4
, first_param
= true;
16958 function_args_iterator args_iter
;
16959 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
16960 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
16962 if (!NONDEBUG_INSN_P (insn
))
16965 if (!CALL_P (insn
))
16968 pat
= PATTERN (insn
);
16969 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
16970 call
= XVECEXP (pat
, 0, 0);
16972 /* Get the real call RTX if the insn sets a value, ie. returns. */
16973 if (GET_CODE (call
) == SET
)
16974 call
= SET_SRC (call
);
16976 /* Check if it is a cmse_nonsecure_call. */
16977 unspec
= XEXP (call
, 0);
16978 if (GET_CODE (unspec
) != UNSPEC
16979 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
16982 /* Determine the caller-saved registers we need to clear. */
16983 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
16984 maxregno
= NUM_ARG_REGS
- 1;
16985 /* Only look at the caller-saved floating point registers in case of
16986 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16987 lazy store and loads which clear both caller- and callee-saved
16989 if (TARGET_HARD_FLOAT_ABI
)
16991 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
16992 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
16993 to_clear_mask
|= float_mask
;
16994 maxregno
= D7_VFP_REGNUM
;
16997 /* Make sure the register used to hold the function address is not
16999 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
17000 gcc_assert (MEM_P (address
));
17001 gcc_assert (REG_P (XEXP (address
, 0)));
17002 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
17004 /* Set basic block of call insn so that df rescan is performed on
17005 insns inserted here. */
17006 set_block_for_insn (insn
, bb
);
17007 df_set_flags (DF_DEFER_INSN_RESCAN
);
17010 /* Make sure the scheduler doesn't schedule other insns beyond
17012 emit_insn (gen_blockage ());
17014 /* Walk through all arguments and clear registers appropriately.
17016 fntype
= TREE_TYPE (MEM_EXPR (address
));
17017 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
17019 args_so_far
= pack_cumulative_args (&args_so_far_v
);
17020 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
17023 machine_mode arg_mode
= TYPE_MODE (arg_type
);
17025 if (VOID_TYPE_P (arg_type
))
17029 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
17032 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
17034 gcc_assert (REG_P (arg_rtx
));
17036 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
17038 padding_bits_to_clear_ptr
);
17040 first_param
= false;
17043 /* Clear padding bits where needed. */
17044 cleared_reg
= XEXP (address
, 0);
17045 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17047 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
17049 if (padding_bits_to_clear
[regno
] == 0)
17052 /* If this is a Thumb-1 target copy the address of the function
17053 we are calling from 'r4' into 'ip' such that we can use r4 to
17054 clear the unused bits in the arguments. */
17055 if (TARGET_THUMB1
&& !using_r4
)
17059 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
17063 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
17064 emit_move_insn (reg
, tmp
);
17065 /* Also fill the top half of the negated
17066 padding_bits_to_clear. */
17067 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
17069 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
17070 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
17076 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
17077 gen_rtx_REG (SImode
, regno
),
17082 emit_move_insn (cleared_reg
,
17083 gen_rtx_REG (SImode
, IP_REGNUM
));
17085 /* We use right shift and left shift to clear the LSB of the address
17086 we jump to instead of using bic, to avoid having to use an extra
17087 register on Thumb-1. */
17088 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
17089 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17090 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
17091 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17093 /* Clearing all registers that leak before doing a non-secure
17095 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17097 if (!(to_clear_mask
& (1LL << regno
)))
17100 /* If regno is an even vfp register and its successor is also to
17101 be cleared, use vmov. */
17102 if (IS_VFP_REGNUM (regno
))
17104 if (TARGET_VFP_DOUBLE
17105 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17106 && to_clear_mask
& (1LL << (regno
+ 1)))
17107 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
17108 CONST0_RTX (DFmode
));
17110 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17111 CONST0_RTX (SFmode
));
17114 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17117 seq
= get_insns ();
17119 emit_insn_before (seq
, insn
);
17125 /* Rewrite move insn into subtract of 0 if the condition codes will
17126 be useful in next conditional jump insn. */
17129 thumb1_reorg (void)
17133 FOR_EACH_BB_FN (bb
, cfun
)
17136 rtx cmp
, op0
, op1
, set
= NULL
;
17137 rtx_insn
*prev
, *insn
= BB_END (bb
);
17138 bool insn_clobbered
= false;
17140 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17141 insn
= PREV_INSN (insn
);
17143 /* Find the last cbranchsi4_insn in basic block BB. */
17144 if (insn
== BB_HEAD (bb
)
17145 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17148 /* Get the register with which we are comparing. */
17149 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17150 op0
= XEXP (cmp
, 0);
17151 op1
= XEXP (cmp
, 1);
17153 /* Check that comparison is against ZERO. */
17154 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17157 /* Find the first flag setting insn before INSN in basic block BB. */
17158 gcc_assert (insn
!= BB_HEAD (bb
));
17159 for (prev
= PREV_INSN (insn
);
17161 && prev
!= BB_HEAD (bb
)
17163 || DEBUG_INSN_P (prev
)
17164 || ((set
= single_set (prev
)) != NULL
17165 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17166 prev
= PREV_INSN (prev
))
17168 if (reg_set_p (op0
, prev
))
17169 insn_clobbered
= true;
17172 /* Skip if op0 is clobbered by insn other than prev. */
17173 if (insn_clobbered
)
17179 dest
= SET_DEST (set
);
17180 src
= SET_SRC (set
);
17181 if (!low_register_operand (dest
, SImode
)
17182 || !low_register_operand (src
, SImode
))
17185 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17186 in INSN. Both src and dest of the move insn are checked. */
17187 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17189 dest
= copy_rtx (dest
);
17190 src
= copy_rtx (src
);
17191 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17192 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17193 INSN_CODE (prev
) = -1;
17194 /* Set test register in INSN to dest. */
17195 XEXP (cmp
, 0) = copy_rtx (dest
);
17196 INSN_CODE (insn
) = -1;
17201 /* Convert instructions to their cc-clobbering variant if possible, since
17202 that allows us to use smaller encodings. */
17205 thumb2_reorg (void)
17210 INIT_REG_SET (&live
);
17212 /* We are freeing block_for_insn in the toplev to keep compatibility
17213 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17214 compute_bb_for_insn ();
17217 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17219 FOR_EACH_BB_FN (bb
, cfun
)
17221 if ((current_tune
->disparage_flag_setting_t16_encodings
17222 == tune_params::DISPARAGE_FLAGS_ALL
)
17223 && optimize_bb_for_speed_p (bb
))
17227 Convert_Action action
= SKIP
;
17228 Convert_Action action_for_partial_flag_setting
17229 = ((current_tune
->disparage_flag_setting_t16_encodings
17230 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17231 && optimize_bb_for_speed_p (bb
))
17234 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17235 df_simulate_initialize_backwards (bb
, &live
);
17236 FOR_BB_INSNS_REVERSE (bb
, insn
)
17238 if (NONJUMP_INSN_P (insn
)
17239 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17240 && GET_CODE (PATTERN (insn
)) == SET
)
17243 rtx pat
= PATTERN (insn
);
17244 rtx dst
= XEXP (pat
, 0);
17245 rtx src
= XEXP (pat
, 1);
17246 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17248 if (UNARY_P (src
) || BINARY_P (src
))
17249 op0
= XEXP (src
, 0);
17251 if (BINARY_P (src
))
17252 op1
= XEXP (src
, 1);
17254 if (low_register_operand (dst
, SImode
))
17256 switch (GET_CODE (src
))
17259 /* Adding two registers and storing the result
17260 in the first source is already a 16-bit
17262 if (rtx_equal_p (dst
, op0
)
17263 && register_operand (op1
, SImode
))
17266 if (low_register_operand (op0
, SImode
))
17268 /* ADDS <Rd>,<Rn>,<Rm> */
17269 if (low_register_operand (op1
, SImode
))
17271 /* ADDS <Rdn>,#<imm8> */
17272 /* SUBS <Rdn>,#<imm8> */
17273 else if (rtx_equal_p (dst
, op0
)
17274 && CONST_INT_P (op1
)
17275 && IN_RANGE (INTVAL (op1
), -255, 255))
17277 /* ADDS <Rd>,<Rn>,#<imm3> */
17278 /* SUBS <Rd>,<Rn>,#<imm3> */
17279 else if (CONST_INT_P (op1
)
17280 && IN_RANGE (INTVAL (op1
), -7, 7))
17283 /* ADCS <Rd>, <Rn> */
17284 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17285 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17286 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17288 && COMPARISON_P (op1
)
17289 && cc_register (XEXP (op1
, 0), VOIDmode
)
17290 && maybe_get_arm_condition_code (op1
) == ARM_CS
17291 && XEXP (op1
, 1) == const0_rtx
)
17296 /* RSBS <Rd>,<Rn>,#0
17297 Not handled here: see NEG below. */
17298 /* SUBS <Rd>,<Rn>,#<imm3>
17300 Not handled here: see PLUS above. */
17301 /* SUBS <Rd>,<Rn>,<Rm> */
17302 if (low_register_operand (op0
, SImode
)
17303 && low_register_operand (op1
, SImode
))
17308 /* MULS <Rdm>,<Rn>,<Rdm>
17309 As an exception to the rule, this is only used
17310 when optimizing for size since MULS is slow on all
17311 known implementations. We do not even want to use
17312 MULS in cold code, if optimizing for speed, so we
17313 test the global flag here. */
17314 if (!optimize_size
)
17316 /* Fall through. */
17320 /* ANDS <Rdn>,<Rm> */
17321 if (rtx_equal_p (dst
, op0
)
17322 && low_register_operand (op1
, SImode
))
17323 action
= action_for_partial_flag_setting
;
17324 else if (rtx_equal_p (dst
, op1
)
17325 && low_register_operand (op0
, SImode
))
17326 action
= action_for_partial_flag_setting
== SKIP
17327 ? SKIP
: SWAP_CONV
;
17333 /* ASRS <Rdn>,<Rm> */
17334 /* LSRS <Rdn>,<Rm> */
17335 /* LSLS <Rdn>,<Rm> */
17336 if (rtx_equal_p (dst
, op0
)
17337 && low_register_operand (op1
, SImode
))
17338 action
= action_for_partial_flag_setting
;
17339 /* ASRS <Rd>,<Rm>,#<imm5> */
17340 /* LSRS <Rd>,<Rm>,#<imm5> */
17341 /* LSLS <Rd>,<Rm>,#<imm5> */
17342 else if (low_register_operand (op0
, SImode
)
17343 && CONST_INT_P (op1
)
17344 && IN_RANGE (INTVAL (op1
), 0, 31))
17345 action
= action_for_partial_flag_setting
;
17349 /* RORS <Rdn>,<Rm> */
17350 if (rtx_equal_p (dst
, op0
)
17351 && low_register_operand (op1
, SImode
))
17352 action
= action_for_partial_flag_setting
;
17356 /* MVNS <Rd>,<Rm> */
17357 if (low_register_operand (op0
, SImode
))
17358 action
= action_for_partial_flag_setting
;
17362 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17363 if (low_register_operand (op0
, SImode
))
17368 /* MOVS <Rd>,#<imm8> */
17369 if (CONST_INT_P (src
)
17370 && IN_RANGE (INTVAL (src
), 0, 255))
17371 action
= action_for_partial_flag_setting
;
17375 /* MOVS and MOV<c> with registers have different
17376 encodings, so are not relevant here. */
17384 if (action
!= SKIP
)
17386 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17387 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17390 if (action
== SWAP_CONV
)
17392 src
= copy_rtx (src
);
17393 XEXP (src
, 0) = op1
;
17394 XEXP (src
, 1) = op0
;
17395 pat
= gen_rtx_SET (dst
, src
);
17396 vec
= gen_rtvec (2, pat
, clobber
);
17398 else /* action == CONV */
17399 vec
= gen_rtvec (2, pat
, clobber
);
17401 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17402 INSN_CODE (insn
) = -1;
17406 if (NONDEBUG_INSN_P (insn
))
17407 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17411 CLEAR_REG_SET (&live
);
17414 /* Gcc puts the pool in the wrong place for ARM, since we can only
17415 load addresses a limited distance around the pc. We do some
17416 special munging to move the constant pool values to the correct
17417 point in the code. */
17422 HOST_WIDE_INT address
= 0;
17426 cmse_nonsecure_call_clear_caller_saved ();
17429 else if (TARGET_THUMB2
)
17432 /* Ensure all insns that must be split have been split at this point.
17433 Otherwise, the pool placement code below may compute incorrect
17434 insn lengths. Note that when optimizing, all insns have already
17435 been split at this point. */
17437 split_all_insns_noflow ();
17439 /* Make sure we do not attempt to create a literal pool even though it should
17440 no longer be necessary to create any. */
17441 if (arm_disable_literal_pool
)
17444 minipool_fix_head
= minipool_fix_tail
= NULL
;
17446 /* The first insn must always be a note, or the code below won't
17447 scan it properly. */
17448 insn
= get_insns ();
17449 gcc_assert (NOTE_P (insn
));
17452 /* Scan all the insns and record the operands that will need fixing. */
17453 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17455 if (BARRIER_P (insn
))
17456 push_minipool_barrier (insn
, address
);
17457 else if (INSN_P (insn
))
17459 rtx_jump_table_data
*table
;
17461 note_invalid_constants (insn
, address
, true);
17462 address
+= get_attr_length (insn
);
17464 /* If the insn is a vector jump, add the size of the table
17465 and skip the table. */
17466 if (tablejump_p (insn
, NULL
, &table
))
17468 address
+= get_jump_table_size (table
);
17472 else if (LABEL_P (insn
))
17473 /* Add the worst-case padding due to alignment. We don't add
17474 the _current_ padding because the minipool insertions
17475 themselves might change it. */
17476 address
+= get_label_padding (insn
);
17479 fix
= minipool_fix_head
;
17481 /* Now scan the fixups and perform the required changes. */
17486 Mfix
* last_added_fix
;
17487 Mfix
* last_barrier
= NULL
;
17490 /* Skip any further barriers before the next fix. */
17491 while (fix
&& BARRIER_P (fix
->insn
))
17494 /* No more fixes. */
17498 last_added_fix
= NULL
;
17500 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17502 if (BARRIER_P (ftmp
->insn
))
17504 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17507 last_barrier
= ftmp
;
17509 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17512 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17515 /* If we found a barrier, drop back to that; any fixes that we
17516 could have reached but come after the barrier will now go in
17517 the next mini-pool. */
17518 if (last_barrier
!= NULL
)
17520 /* Reduce the refcount for those fixes that won't go into this
17522 for (fdel
= last_barrier
->next
;
17523 fdel
&& fdel
!= ftmp
;
17526 fdel
->minipool
->refcount
--;
17527 fdel
->minipool
= NULL
;
17530 ftmp
= last_barrier
;
17534 /* ftmp is first fix that we can't fit into this pool and
17535 there no natural barriers that we could use. Insert a
17536 new barrier in the code somewhere between the previous
17537 fix and this one, and arrange to jump around it. */
17538 HOST_WIDE_INT max_address
;
17540 /* The last item on the list of fixes must be a barrier, so
17541 we can never run off the end of the list of fixes without
17542 last_barrier being set. */
17545 max_address
= minipool_vector_head
->max_address
;
17546 /* Check that there isn't another fix that is in range that
17547 we couldn't fit into this pool because the pool was
17548 already too large: we need to put the pool before such an
17549 instruction. The pool itself may come just after the
17550 fix because create_fix_barrier also allows space for a
17551 jump instruction. */
17552 if (ftmp
->address
< max_address
)
17553 max_address
= ftmp
->address
+ 1;
17555 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17558 assign_minipool_offsets (last_barrier
);
17562 if (!BARRIER_P (ftmp
->insn
)
17563 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17570 /* Scan over the fixes we have identified for this pool, fixing them
17571 up and adding the constants to the pool itself. */
17572 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17573 this_fix
= this_fix
->next
)
17574 if (!BARRIER_P (this_fix
->insn
))
17577 = plus_constant (Pmode
,
17578 gen_rtx_LABEL_REF (VOIDmode
,
17579 minipool_vector_label
),
17580 this_fix
->minipool
->offset
);
17581 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17584 dump_minipool (last_barrier
->insn
);
17588 /* From now on we must synthesize any constants that we can't handle
17589 directly. This can happen if the RTL gets split during final
17590 instruction generation. */
17591 cfun
->machine
->after_arm_reorg
= 1;
17593 /* Free the minipool memory. */
17594 obstack_free (&minipool_obstack
, minipool_startobj
);
17597 /* Routines to output assembly language. */
17599 /* Return string representation of passed in real value. */
17600 static const char *
17601 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17603 if (!fp_consts_inited
)
17606 gcc_assert (real_equal (r
, &value_fp0
));
17610 /* OPERANDS[0] is the entire list of insns that constitute pop,
17611 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17612 is in the list, UPDATE is true iff the list contains explicit
17613 update of base register. */
17615 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17621 const char *conditional
;
17622 int num_saves
= XVECLEN (operands
[0], 0);
17623 unsigned int regno
;
17624 unsigned int regno_base
= REGNO (operands
[1]);
17625 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17628 offset
+= update
? 1 : 0;
17629 offset
+= return_pc
? 1 : 0;
17631 /* Is the base register in the list? */
17632 for (i
= offset
; i
< num_saves
; i
++)
17634 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17635 /* If SP is in the list, then the base register must be SP. */
17636 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17637 /* If base register is in the list, there must be no explicit update. */
17638 if (regno
== regno_base
)
17639 gcc_assert (!update
);
17642 conditional
= reverse
? "%?%D0" : "%?%d0";
17643 /* Can't use POP if returning from an interrupt. */
17644 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17645 sprintf (pattern
, "pop%s\t{", conditional
);
17648 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17649 It's just a convention, their semantics are identical. */
17650 if (regno_base
== SP_REGNUM
)
17651 sprintf (pattern
, "ldmfd%s\t", conditional
);
17653 sprintf (pattern
, "ldmia%s\t", conditional
);
17655 sprintf (pattern
, "ldm%s\t", conditional
);
17657 strcat (pattern
, reg_names
[regno_base
]);
17659 strcat (pattern
, "!, {");
17661 strcat (pattern
, ", {");
17664 /* Output the first destination register. */
17666 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17668 /* Output the rest of the destination registers. */
17669 for (i
= offset
+ 1; i
< num_saves
; i
++)
17671 strcat (pattern
, ", ");
17673 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17676 strcat (pattern
, "}");
17678 if (interrupt_p
&& return_pc
)
17679 strcat (pattern
, "^");
17681 output_asm_insn (pattern
, &cond
);
17685 /* Output the assembly for a store multiple. */
17688 vfp_output_vstmd (rtx
* operands
)
17694 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17695 ? XEXP (operands
[0], 0)
17696 : XEXP (XEXP (operands
[0], 0), 0);
17697 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17700 strcpy (pattern
, "vpush%?.64\t{%P1");
17702 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17704 p
= strlen (pattern
);
17706 gcc_assert (REG_P (operands
[1]));
17708 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17709 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17711 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17713 strcpy (&pattern
[p
], "}");
17715 output_asm_insn (pattern
, operands
);
17720 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17721 number of bytes pushed. */
17724 vfp_emit_fstmd (int base_reg
, int count
)
17731 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17732 register pairs are stored by a store multiple insn. We avoid this
17733 by pushing an extra pair. */
17734 if (count
== 2 && !arm_arch6
)
17736 if (base_reg
== LAST_VFP_REGNUM
- 3)
17741 /* FSTMD may not store more than 16 doubleword registers at once. Split
17742 larger stores into multiple parts (up to a maximum of two, in
17747 /* NOTE: base_reg is an internal register number, so each D register
17749 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17750 saved
+= vfp_emit_fstmd (base_reg
, 16);
17754 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17755 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17757 reg
= gen_rtx_REG (DFmode
, base_reg
);
17760 XVECEXP (par
, 0, 0)
17761 = gen_rtx_SET (gen_frame_mem
17763 gen_rtx_PRE_MODIFY (Pmode
,
17766 (Pmode
, stack_pointer_rtx
,
17769 gen_rtx_UNSPEC (BLKmode
,
17770 gen_rtvec (1, reg
),
17771 UNSPEC_PUSH_MULT
));
17773 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17774 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17775 RTX_FRAME_RELATED_P (tmp
) = 1;
17776 XVECEXP (dwarf
, 0, 0) = tmp
;
17778 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17779 RTX_FRAME_RELATED_P (tmp
) = 1;
17780 XVECEXP (dwarf
, 0, 1) = tmp
;
17782 for (i
= 1; i
< count
; i
++)
17784 reg
= gen_rtx_REG (DFmode
, base_reg
);
17786 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17788 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17789 plus_constant (Pmode
,
17793 RTX_FRAME_RELATED_P (tmp
) = 1;
17794 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17797 par
= emit_insn (par
);
17798 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17799 RTX_FRAME_RELATED_P (par
) = 1;
17804 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17805 has the cmse_nonsecure_call attribute and returns false otherwise. */
17808 detect_cmse_nonsecure_call (tree addr
)
17813 tree fntype
= TREE_TYPE (addr
);
17814 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17815 TYPE_ATTRIBUTES (fntype
)))
17821 /* Emit a call instruction with pattern PAT. ADDR is the address of
17822 the call target. */
17825 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17829 insn
= emit_call_insn (pat
);
17831 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17832 If the call might use such an entry, add a use of the PIC register
17833 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17834 if (TARGET_VXWORKS_RTP
17837 && GET_CODE (addr
) == SYMBOL_REF
17838 && (SYMBOL_REF_DECL (addr
)
17839 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17840 : !SYMBOL_REF_LOCAL_P (addr
)))
17842 require_pic_register ();
17843 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17846 if (TARGET_AAPCS_BASED
)
17848 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17849 linker. We need to add an IP clobber to allow setting
17850 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17851 is not needed since it's a fixed register. */
17852 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17853 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17857 /* Output a 'call' insn. */
17859 output_call (rtx
*operands
)
17861 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17863 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17864 if (REGNO (operands
[0]) == LR_REGNUM
)
17866 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17867 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17870 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17872 if (TARGET_INTERWORK
|| arm_arch4t
)
17873 output_asm_insn ("bx%?\t%0", operands
);
17875 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17880 /* Output a move from arm registers to arm registers of a long double
17881 OPERANDS[0] is the destination.
17882 OPERANDS[1] is the source. */
17884 output_mov_long_double_arm_from_arm (rtx
*operands
)
17886 /* We have to be careful here because the two might overlap. */
17887 int dest_start
= REGNO (operands
[0]);
17888 int src_start
= REGNO (operands
[1]);
17892 if (dest_start
< src_start
)
17894 for (i
= 0; i
< 3; i
++)
17896 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17897 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17898 output_asm_insn ("mov%?\t%0, %1", ops
);
17903 for (i
= 2; i
>= 0; i
--)
17905 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17906 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17907 output_asm_insn ("mov%?\t%0, %1", ops
);
17915 arm_emit_movpair (rtx dest
, rtx src
)
17917 /* If the src is an immediate, simplify it. */
17918 if (CONST_INT_P (src
))
17920 HOST_WIDE_INT val
= INTVAL (src
);
17921 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17922 if ((val
>> 16) & 0x0000ffff)
17924 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17926 GEN_INT ((val
>> 16) & 0x0000ffff));
17927 rtx_insn
*insn
= get_last_insn ();
17928 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17932 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17933 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17934 rtx_insn
*insn
= get_last_insn ();
17935 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17938 /* Output a move between double words. It must be REG<-MEM
17941 output_move_double (rtx
*operands
, bool emit
, int *count
)
17943 enum rtx_code code0
= GET_CODE (operands
[0]);
17944 enum rtx_code code1
= GET_CODE (operands
[1]);
17949 /* The only case when this might happen is when
17950 you are looking at the length of a DImode instruction
17951 that has an invalid constant in it. */
17952 if (code0
== REG
&& code1
!= MEM
)
17954 gcc_assert (!emit
);
17961 unsigned int reg0
= REGNO (operands
[0]);
17963 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17965 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17967 switch (GET_CODE (XEXP (operands
[1], 0)))
17974 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17975 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
17977 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17982 gcc_assert (TARGET_LDRD
);
17984 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17991 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17993 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18001 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18003 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18008 gcc_assert (TARGET_LDRD
);
18010 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18015 /* Autoicrement addressing modes should never have overlapping
18016 base and destination registers, and overlapping index registers
18017 are already prohibited, so this doesn't need to worry about
18019 otherops
[0] = operands
[0];
18020 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18021 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18023 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18025 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18027 /* Registers overlap so split out the increment. */
18030 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18031 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18038 /* Use a single insn if we can.
18039 FIXME: IWMMXT allows offsets larger than ldrd can
18040 handle, fix these up with a pair of ldr. */
18042 || !CONST_INT_P (otherops
[2])
18043 || (INTVAL (otherops
[2]) > -256
18044 && INTVAL (otherops
[2]) < 256))
18047 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18053 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18054 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18064 /* Use a single insn if we can.
18065 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18066 fix these up with a pair of ldr. */
18068 || !CONST_INT_P (otherops
[2])
18069 || (INTVAL (otherops
[2]) > -256
18070 && INTVAL (otherops
[2]) < 256))
18073 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18079 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18080 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18090 /* We might be able to use ldrd %0, %1 here. However the range is
18091 different to ldr/adr, and it is broken on some ARMv7-M
18092 implementations. */
18093 /* Use the second register of the pair to avoid problematic
18095 otherops
[1] = operands
[1];
18097 output_asm_insn ("adr%?\t%0, %1", otherops
);
18098 operands
[1] = otherops
[0];
18102 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18104 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18111 /* ??? This needs checking for thumb2. */
18113 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18114 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18116 otherops
[0] = operands
[0];
18117 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18118 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18120 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18122 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18124 switch ((int) INTVAL (otherops
[2]))
18128 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18134 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18140 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18144 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18145 operands
[1] = otherops
[0];
18147 && (REG_P (otherops
[2])
18149 || (CONST_INT_P (otherops
[2])
18150 && INTVAL (otherops
[2]) > -256
18151 && INTVAL (otherops
[2]) < 256)))
18153 if (reg_overlap_mentioned_p (operands
[0],
18156 /* Swap base and index registers over to
18157 avoid a conflict. */
18158 std::swap (otherops
[1], otherops
[2]);
18160 /* If both registers conflict, it will usually
18161 have been fixed by a splitter. */
18162 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18163 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18167 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18168 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18175 otherops
[0] = operands
[0];
18177 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18182 if (CONST_INT_P (otherops
[2]))
18186 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18187 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18189 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18195 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18201 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18208 return "ldrd%?\t%0, [%1]";
18210 return "ldmia%?\t%1, %M0";
18214 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18215 /* Take care of overlapping base/data reg. */
18216 if (reg_mentioned_p (operands
[0], operands
[1]))
18220 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18221 output_asm_insn ("ldr%?\t%0, %1", operands
);
18231 output_asm_insn ("ldr%?\t%0, %1", operands
);
18232 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18242 /* Constraints should ensure this. */
18243 gcc_assert (code0
== MEM
&& code1
== REG
);
18244 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18245 || (TARGET_ARM
&& TARGET_LDRD
));
18247 switch (GET_CODE (XEXP (operands
[0], 0)))
18253 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18255 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18260 gcc_assert (TARGET_LDRD
);
18262 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18269 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18271 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18279 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18281 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18286 gcc_assert (TARGET_LDRD
);
18288 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18293 otherops
[0] = operands
[1];
18294 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18295 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18297 /* IWMMXT allows offsets larger than ldrd can handle,
18298 fix these up with a pair of ldr. */
18300 && CONST_INT_P (otherops
[2])
18301 && (INTVAL(otherops
[2]) <= -256
18302 || INTVAL(otherops
[2]) >= 256))
18304 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18308 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18309 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18318 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18319 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18325 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18328 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18333 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18338 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18339 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18341 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18345 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18352 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18359 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18364 && (REG_P (otherops
[2])
18366 || (CONST_INT_P (otherops
[2])
18367 && INTVAL (otherops
[2]) > -256
18368 && INTVAL (otherops
[2]) < 256)))
18370 otherops
[0] = operands
[1];
18371 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18373 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18379 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18380 otherops
[1] = operands
[1];
18383 output_asm_insn ("str%?\t%1, %0", operands
);
18384 output_asm_insn ("str%?\t%H1, %0", otherops
);
18394 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18395 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18398 output_move_quad (rtx
*operands
)
18400 if (REG_P (operands
[0]))
18402 /* Load, or reg->reg move. */
18404 if (MEM_P (operands
[1]))
18406 switch (GET_CODE (XEXP (operands
[1], 0)))
18409 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18414 output_asm_insn ("adr%?\t%0, %1", operands
);
18415 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18419 gcc_unreachable ();
18427 gcc_assert (REG_P (operands
[1]));
18429 dest
= REGNO (operands
[0]);
18430 src
= REGNO (operands
[1]);
18432 /* This seems pretty dumb, but hopefully GCC won't try to do it
18435 for (i
= 0; i
< 4; i
++)
18437 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18438 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18439 output_asm_insn ("mov%?\t%0, %1", ops
);
18442 for (i
= 3; i
>= 0; i
--)
18444 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18445 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18446 output_asm_insn ("mov%?\t%0, %1", ops
);
18452 gcc_assert (MEM_P (operands
[0]));
18453 gcc_assert (REG_P (operands
[1]));
18454 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18456 switch (GET_CODE (XEXP (operands
[0], 0)))
18459 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18463 gcc_unreachable ();
18470 /* Output a VFP load or store instruction. */
18473 output_move_vfp (rtx
*operands
)
18475 rtx reg
, mem
, addr
, ops
[2];
18476 int load
= REG_P (operands
[0]);
18477 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18478 int sp
= (!TARGET_VFP_FP16INST
18479 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18480 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18485 reg
= operands
[!load
];
18486 mem
= operands
[load
];
18488 mode
= GET_MODE (reg
);
18490 gcc_assert (REG_P (reg
));
18491 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18492 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18498 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18499 gcc_assert (MEM_P (mem
));
18501 addr
= XEXP (mem
, 0);
18503 switch (GET_CODE (addr
))
18506 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18507 ops
[0] = XEXP (addr
, 0);
18512 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18513 ops
[0] = XEXP (addr
, 0);
18518 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18524 sprintf (buff
, templ
,
18525 load
? "ld" : "st",
18526 dp
? "64" : sp
? "32" : "16",
18528 integer_p
? "\t%@ int" : "");
18529 output_asm_insn (buff
, ops
);
18534 /* Output a Neon double-word or quad-word load or store, or a load
18535 or store for larger structure modes.
18537 WARNING: The ordering of elements is weird in big-endian mode,
18538 because the EABI requires that vectors stored in memory appear
18539 as though they were stored by a VSTM, as required by the EABI.
18540 GCC RTL defines element ordering based on in-memory order.
18541 This can be different from the architectural ordering of elements
18542 within a NEON register. The intrinsics defined in arm_neon.h use the
18543 NEON register element ordering, not the GCC RTL element ordering.
18545 For example, the in-memory ordering of a big-endian a quadword
18546 vector with 16-bit elements when stored from register pair {d0,d1}
18547 will be (lowest address first, d0[N] is NEON register element N):
18549 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18551 When necessary, quadword registers (dN, dN+1) are moved to ARM
18552 registers from rN in the order:
18554 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18556 So that STM/LDM can be used on vectors in ARM registers, and the
18557 same memory layout will result as if VSTM/VLDM were used.
18559 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18560 possible, which allows use of appropriate alignment tags.
18561 Note that the choice of "64" is independent of the actual vector
18562 element size; this size simply ensures that the behavior is
18563 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18565 Due to limitations of those instructions, use of VST1.64/VLD1.64
18566 is not possible if:
18567 - the address contains PRE_DEC, or
18568 - the mode refers to more than 4 double-word registers
18570 In those cases, it would be possible to replace VSTM/VLDM by a
18571 sequence of instructions; this is not currently implemented since
18572 this is not certain to actually improve performance. */
18575 output_move_neon (rtx
*operands
)
18577 rtx reg
, mem
, addr
, ops
[2];
18578 int regno
, nregs
, load
= REG_P (operands
[0]);
18583 reg
= operands
[!load
];
18584 mem
= operands
[load
];
18586 mode
= GET_MODE (reg
);
18588 gcc_assert (REG_P (reg
));
18589 regno
= REGNO (reg
);
18590 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18591 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18592 || NEON_REGNO_OK_FOR_QUAD (regno
));
18593 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18594 || VALID_NEON_QREG_MODE (mode
)
18595 || VALID_NEON_STRUCT_MODE (mode
));
18596 gcc_assert (MEM_P (mem
));
18598 addr
= XEXP (mem
, 0);
18600 /* Strip off const from addresses like (const (plus (...))). */
18601 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18602 addr
= XEXP (addr
, 0);
18604 switch (GET_CODE (addr
))
18607 /* We have to use vldm / vstm for too-large modes. */
18610 templ
= "v%smia%%?\t%%0!, %%h1";
18611 ops
[0] = XEXP (addr
, 0);
18615 templ
= "v%s1.64\t%%h1, %%A0";
18622 /* We have to use vldm / vstm in this case, since there is no
18623 pre-decrement form of the vld1 / vst1 instructions. */
18624 templ
= "v%smdb%%?\t%%0!, %%h1";
18625 ops
[0] = XEXP (addr
, 0);
18630 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18631 gcc_unreachable ();
18634 /* We have to use vldm / vstm for too-large modes. */
18638 templ
= "v%smia%%?\t%%m0, %%h1";
18640 templ
= "v%s1.64\t%%h1, %%A0";
18646 /* Fall through. */
18652 for (i
= 0; i
< nregs
; i
++)
18654 /* We're only using DImode here because it's a convenient size. */
18655 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18656 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18657 if (reg_overlap_mentioned_p (ops
[0], mem
))
18659 gcc_assert (overlap
== -1);
18664 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18665 output_asm_insn (buff
, ops
);
18670 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18671 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18672 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18673 output_asm_insn (buff
, ops
);
18680 gcc_unreachable ();
18683 sprintf (buff
, templ
, load
? "ld" : "st");
18684 output_asm_insn (buff
, ops
);
18689 /* Compute and return the length of neon_mov<mode>, where <mode> is
18690 one of VSTRUCT modes: EI, OI, CI or XI. */
18692 arm_attr_length_move_neon (rtx_insn
*insn
)
18694 rtx reg
, mem
, addr
;
18698 extract_insn_cached (insn
);
18700 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18702 mode
= GET_MODE (recog_data
.operand
[0]);
18713 gcc_unreachable ();
18717 load
= REG_P (recog_data
.operand
[0]);
18718 reg
= recog_data
.operand
[!load
];
18719 mem
= recog_data
.operand
[load
];
18721 gcc_assert (MEM_P (mem
));
18723 mode
= GET_MODE (reg
);
18724 addr
= XEXP (mem
, 0);
18726 /* Strip off const from addresses like (const (plus (...))). */
18727 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18728 addr
= XEXP (addr
, 0);
18730 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18732 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18739 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18743 arm_address_offset_is_imm (rtx_insn
*insn
)
18747 extract_insn_cached (insn
);
18749 if (REG_P (recog_data
.operand
[0]))
18752 mem
= recog_data
.operand
[0];
18754 gcc_assert (MEM_P (mem
));
18756 addr
= XEXP (mem
, 0);
18759 || (GET_CODE (addr
) == PLUS
18760 && REG_P (XEXP (addr
, 0))
18761 && CONST_INT_P (XEXP (addr
, 1))))
18767 /* Output an ADD r, s, #n where n may be too big for one instruction.
18768 If adding zero to one register, output nothing. */
18770 output_add_immediate (rtx
*operands
)
18772 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18774 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18777 output_multi_immediate (operands
,
18778 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18781 output_multi_immediate (operands
,
18782 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18789 /* Output a multiple immediate operation.
18790 OPERANDS is the vector of operands referred to in the output patterns.
18791 INSTR1 is the output pattern to use for the first constant.
18792 INSTR2 is the output pattern to use for subsequent constants.
18793 IMMED_OP is the index of the constant slot in OPERANDS.
18794 N is the constant value. */
18795 static const char *
18796 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18797 int immed_op
, HOST_WIDE_INT n
)
18799 #if HOST_BITS_PER_WIDE_INT > 32
18805 /* Quick and easy output. */
18806 operands
[immed_op
] = const0_rtx
;
18807 output_asm_insn (instr1
, operands
);
18812 const char * instr
= instr1
;
18814 /* Note that n is never zero here (which would give no output). */
18815 for (i
= 0; i
< 32; i
+= 2)
18819 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18820 output_asm_insn (instr
, operands
);
18830 /* Return the name of a shifter operation. */
18831 static const char *
18832 arm_shift_nmem(enum rtx_code code
)
18837 return ARM_LSL_NAME
;
18853 /* Return the appropriate ARM instruction for the operation code.
18854 The returned result should not be overwritten. OP is the rtx of the
18855 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18858 arithmetic_instr (rtx op
, int shift_first_arg
)
18860 switch (GET_CODE (op
))
18866 return shift_first_arg
? "rsb" : "sub";
18881 return arm_shift_nmem(GET_CODE(op
));
18884 gcc_unreachable ();
18888 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18889 for the operation code. The returned result should not be overwritten.
18890 OP is the rtx code of the shift.
18891 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18893 static const char *
18894 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18897 enum rtx_code code
= GET_CODE (op
);
18902 if (!CONST_INT_P (XEXP (op
, 1)))
18904 output_operand_lossage ("invalid shift operand");
18909 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18917 mnem
= arm_shift_nmem(code
);
18918 if (CONST_INT_P (XEXP (op
, 1)))
18920 *amountp
= INTVAL (XEXP (op
, 1));
18922 else if (REG_P (XEXP (op
, 1)))
18929 output_operand_lossage ("invalid shift operand");
18935 /* We never have to worry about the amount being other than a
18936 power of 2, since this case can never be reloaded from a reg. */
18937 if (!CONST_INT_P (XEXP (op
, 1)))
18939 output_operand_lossage ("invalid shift operand");
18943 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18945 /* Amount must be a power of two. */
18946 if (*amountp
& (*amountp
- 1))
18948 output_operand_lossage ("invalid shift operand");
18952 *amountp
= exact_log2 (*amountp
);
18953 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18954 return ARM_LSL_NAME
;
18957 output_operand_lossage ("invalid shift operand");
18961 /* This is not 100% correct, but follows from the desire to merge
18962 multiplication by a power of 2 with the recognizer for a
18963 shift. >=32 is not a valid shift for "lsl", so we must try and
18964 output a shift that produces the correct arithmetical result.
18965 Using lsr #32 is identical except for the fact that the carry bit
18966 is not set correctly if we set the flags; but we never use the
18967 carry bit from such an operation, so we can ignore that. */
18968 if (code
== ROTATERT
)
18969 /* Rotate is just modulo 32. */
18971 else if (*amountp
!= (*amountp
& 31))
18973 if (code
== ASHIFT
)
18978 /* Shifts of 0 are no-ops. */
18985 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18986 because /bin/as is horribly restrictive. The judgement about
18987 whether or not each character is 'printable' (and can be output as
18988 is) or not (and must be printed with an octal escape) must be made
18989 with reference to the *host* character set -- the situation is
18990 similar to that discussed in the comments above pp_c_char in
18991 c-pretty-print.c. */
18993 #define MAX_ASCII_LEN 51
18996 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18999 int len_so_far
= 0;
19001 fputs ("\t.ascii\t\"", stream
);
19003 for (i
= 0; i
< len
; i
++)
19007 if (len_so_far
>= MAX_ASCII_LEN
)
19009 fputs ("\"\n\t.ascii\t\"", stream
);
19015 if (c
== '\\' || c
== '\"')
19017 putc ('\\', stream
);
19025 fprintf (stream
, "\\%03o", c
);
19030 fputs ("\"\n", stream
);
19033 /* Whether a register is callee saved or not. This is necessary because high
19034 registers are marked as caller saved when optimizing for size on Thumb-1
19035 targets despite being callee saved in order to avoid using them. */
19036 #define callee_saved_reg_p(reg) \
19037 (!call_used_regs[reg] \
19038 || (TARGET_THUMB1 && optimize_size \
19039 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19041 /* Compute the register save mask for registers 0 through 12
19042 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19044 static unsigned long
19045 arm_compute_save_reg0_reg12_mask (void)
19047 unsigned long func_type
= arm_current_func_type ();
19048 unsigned long save_reg_mask
= 0;
19051 if (IS_INTERRUPT (func_type
))
19053 unsigned int max_reg
;
19054 /* Interrupt functions must not corrupt any registers,
19055 even call clobbered ones. If this is a leaf function
19056 we can just examine the registers used by the RTL, but
19057 otherwise we have to assume that whatever function is
19058 called might clobber anything, and so we have to save
19059 all the call-clobbered registers as well. */
19060 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19061 /* FIQ handlers have registers r8 - r12 banked, so
19062 we only need to check r0 - r7, Normal ISRs only
19063 bank r14 and r15, so we must check up to r12.
19064 r13 is the stack pointer which is always preserved,
19065 so we do not need to consider it here. */
19070 for (reg
= 0; reg
<= max_reg
; reg
++)
19071 if (df_regs_ever_live_p (reg
)
19072 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19073 save_reg_mask
|= (1 << reg
);
19075 /* Also save the pic base register if necessary. */
19077 && !TARGET_SINGLE_PIC_BASE
19078 && arm_pic_register
!= INVALID_REGNUM
19079 && crtl
->uses_pic_offset_table
)
19080 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19082 else if (IS_VOLATILE(func_type
))
19084 /* For noreturn functions we historically omitted register saves
19085 altogether. However this really messes up debugging. As a
19086 compromise save just the frame pointers. Combined with the link
19087 register saved elsewhere this should be sufficient to get
19089 if (frame_pointer_needed
)
19090 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19091 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19092 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19093 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19094 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19098 /* In the normal case we only need to save those registers
19099 which are call saved and which are used by this function. */
19100 for (reg
= 0; reg
<= 11; reg
++)
19101 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19102 save_reg_mask
|= (1 << reg
);
19104 /* Handle the frame pointer as a special case. */
19105 if (frame_pointer_needed
)
19106 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19108 /* If we aren't loading the PIC register,
19109 don't stack it even though it may be live. */
19111 && !TARGET_SINGLE_PIC_BASE
19112 && arm_pic_register
!= INVALID_REGNUM
19113 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19114 || crtl
->uses_pic_offset_table
))
19115 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19117 /* The prologue will copy SP into R0, so save it. */
19118 if (IS_STACKALIGN (func_type
))
19119 save_reg_mask
|= 1;
19122 /* Save registers so the exception handler can modify them. */
19123 if (crtl
->calls_eh_return
)
19129 reg
= EH_RETURN_DATA_REGNO (i
);
19130 if (reg
== INVALID_REGNUM
)
19132 save_reg_mask
|= 1 << reg
;
19136 return save_reg_mask
;
19139 /* Return true if r3 is live at the start of the function. */
19142 arm_r3_live_at_start_p (void)
19144 /* Just look at cfg info, which is still close enough to correct at this
19145 point. This gives false positives for broken functions that might use
19146 uninitialized data that happens to be allocated in r3, but who cares? */
19147 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19150 /* Compute the number of bytes used to store the static chain register on the
19151 stack, above the stack frame. We need to know this accurately to get the
19152 alignment of the rest of the stack frame correct. */
19155 arm_compute_static_chain_stack_bytes (void)
19157 /* See the defining assertion in arm_expand_prologue. */
19158 if (IS_NESTED (arm_current_func_type ())
19159 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19160 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19161 && !df_regs_ever_live_p (LR_REGNUM
)))
19162 && arm_r3_live_at_start_p ()
19163 && crtl
->args
.pretend_args_size
== 0)
19169 /* Compute a bit mask of which core registers need to be
19170 saved on the stack for the current function.
19171 This is used by arm_compute_frame_layout, which may add extra registers. */
19173 static unsigned long
19174 arm_compute_save_core_reg_mask (void)
19176 unsigned int save_reg_mask
= 0;
19177 unsigned long func_type
= arm_current_func_type ();
19180 if (IS_NAKED (func_type
))
19181 /* This should never really happen. */
19184 /* If we are creating a stack frame, then we must save the frame pointer,
19185 IP (which will hold the old stack pointer), LR and the PC. */
19186 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19188 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19191 | (1 << PC_REGNUM
);
19193 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19195 /* Decide if we need to save the link register.
19196 Interrupt routines have their own banked link register,
19197 so they never need to save it.
19198 Otherwise if we do not use the link register we do not need to save
19199 it. If we are pushing other registers onto the stack however, we
19200 can save an instruction in the epilogue by pushing the link register
19201 now and then popping it back into the PC. This incurs extra memory
19202 accesses though, so we only do it when optimizing for size, and only
19203 if we know that we will not need a fancy return sequence. */
19204 if (df_regs_ever_live_p (LR_REGNUM
)
19207 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19208 && !crtl
->tail_call_emit
19209 && !crtl
->calls_eh_return
))
19210 save_reg_mask
|= 1 << LR_REGNUM
;
19212 if (cfun
->machine
->lr_save_eliminated
)
19213 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19215 if (TARGET_REALLY_IWMMXT
19216 && ((bit_count (save_reg_mask
)
19217 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19218 arm_compute_static_chain_stack_bytes())
19221 /* The total number of registers that are going to be pushed
19222 onto the stack is odd. We need to ensure that the stack
19223 is 64-bit aligned before we start to save iWMMXt registers,
19224 and also before we start to create locals. (A local variable
19225 might be a double or long long which we will load/store using
19226 an iWMMXt instruction). Therefore we need to push another
19227 ARM register, so that the stack will be 64-bit aligned. We
19228 try to avoid using the arg registers (r0 -r3) as they might be
19229 used to pass values in a tail call. */
19230 for (reg
= 4; reg
<= 12; reg
++)
19231 if ((save_reg_mask
& (1 << reg
)) == 0)
19235 save_reg_mask
|= (1 << reg
);
19238 cfun
->machine
->sibcall_blocked
= 1;
19239 save_reg_mask
|= (1 << 3);
19243 /* We may need to push an additional register for use initializing the
19244 PIC base register. */
19245 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19246 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19248 reg
= thumb_find_work_register (1 << 4);
19249 if (!call_used_regs
[reg
])
19250 save_reg_mask
|= (1 << reg
);
19253 return save_reg_mask
;
19256 /* Compute a bit mask of which core registers need to be
19257 saved on the stack for the current function. */
19258 static unsigned long
19259 thumb1_compute_save_core_reg_mask (void)
19261 unsigned long mask
;
19265 for (reg
= 0; reg
< 12; reg
++)
19266 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19269 /* Handle the frame pointer as a special case. */
19270 if (frame_pointer_needed
)
19271 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19274 && !TARGET_SINGLE_PIC_BASE
19275 && arm_pic_register
!= INVALID_REGNUM
19276 && crtl
->uses_pic_offset_table
)
19277 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19279 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19280 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19281 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19283 /* LR will also be pushed if any lo regs are pushed. */
19284 if (mask
& 0xff || thumb_force_lr_save ())
19285 mask
|= (1 << LR_REGNUM
);
19287 /* Make sure we have a low work register if we need one.
19288 We will need one if we are going to push a high register,
19289 but we are not currently intending to push a low register. */
19290 if ((mask
& 0xff) == 0
19291 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19293 /* Use thumb_find_work_register to choose which register
19294 we will use. If the register is live then we will
19295 have to push it. Use LAST_LO_REGNUM as our fallback
19296 choice for the register to select. */
19297 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19298 /* Make sure the register returned by thumb_find_work_register is
19299 not part of the return value. */
19300 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19301 reg
= LAST_LO_REGNUM
;
19303 if (callee_saved_reg_p (reg
))
19307 /* The 504 below is 8 bytes less than 512 because there are two possible
19308 alignment words. We can't tell here if they will be present or not so we
19309 have to play it safe and assume that they are. */
19310 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19311 ROUND_UP_WORD (get_frame_size ()) +
19312 crtl
->outgoing_args_size
) >= 504)
19314 /* This is the same as the code in thumb1_expand_prologue() which
19315 determines which register to use for stack decrement. */
19316 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19317 if (mask
& (1 << reg
))
19320 if (reg
> LAST_LO_REGNUM
)
19322 /* Make sure we have a register available for stack decrement. */
19323 mask
|= 1 << LAST_LO_REGNUM
;
19331 /* Return the number of bytes required to save VFP registers. */
19333 arm_get_vfp_saved_size (void)
19335 unsigned int regno
;
19340 /* Space for saved VFP registers. */
19341 if (TARGET_HARD_FLOAT
)
19344 for (regno
= FIRST_VFP_REGNUM
;
19345 regno
< LAST_VFP_REGNUM
;
19348 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19349 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19353 /* Workaround ARM10 VFPr1 bug. */
19354 if (count
== 2 && !arm_arch6
)
19356 saved
+= count
* 8;
19365 if (count
== 2 && !arm_arch6
)
19367 saved
+= count
* 8;
19374 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19375 everything bar the final return instruction. If simple_return is true,
19376 then do not output epilogue, because it has already been emitted in RTL. */
19378 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19379 bool simple_return
)
19381 char conditional
[10];
19384 unsigned long live_regs_mask
;
19385 unsigned long func_type
;
19386 arm_stack_offsets
*offsets
;
19388 func_type
= arm_current_func_type ();
19390 if (IS_NAKED (func_type
))
19393 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19395 /* If this function was declared non-returning, and we have
19396 found a tail call, then we have to trust that the called
19397 function won't return. */
19402 /* Otherwise, trap an attempted return by aborting. */
19404 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19406 assemble_external_libcall (ops
[1]);
19407 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19413 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19415 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19417 cfun
->machine
->return_used_this_function
= 1;
19419 offsets
= arm_get_frame_offsets ();
19420 live_regs_mask
= offsets
->saved_regs_mask
;
19422 if (!simple_return
&& live_regs_mask
)
19424 const char * return_reg
;
19426 /* If we do not have any special requirements for function exit
19427 (e.g. interworking) then we can load the return address
19428 directly into the PC. Otherwise we must load it into LR. */
19430 && !IS_CMSE_ENTRY (func_type
)
19431 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19432 return_reg
= reg_names
[PC_REGNUM
];
19434 return_reg
= reg_names
[LR_REGNUM
];
19436 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19438 /* There are three possible reasons for the IP register
19439 being saved. 1) a stack frame was created, in which case
19440 IP contains the old stack pointer, or 2) an ISR routine
19441 corrupted it, or 3) it was saved to align the stack on
19442 iWMMXt. In case 1, restore IP into SP, otherwise just
19444 if (frame_pointer_needed
)
19446 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19447 live_regs_mask
|= (1 << SP_REGNUM
);
19450 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19453 /* On some ARM architectures it is faster to use LDR rather than
19454 LDM to load a single register. On other architectures, the
19455 cost is the same. In 26 bit mode, or for exception handlers,
19456 we have to use LDM to load the PC so that the CPSR is also
19458 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19459 if (live_regs_mask
== (1U << reg
))
19462 if (reg
<= LAST_ARM_REGNUM
19463 && (reg
!= LR_REGNUM
19465 || ! IS_INTERRUPT (func_type
)))
19467 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19468 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19475 /* Generate the load multiple instruction to restore the
19476 registers. Note we can get here, even if
19477 frame_pointer_needed is true, but only if sp already
19478 points to the base of the saved core registers. */
19479 if (live_regs_mask
& (1 << SP_REGNUM
))
19481 unsigned HOST_WIDE_INT stack_adjust
;
19483 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19484 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19486 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19487 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19490 /* If we can't use ldmib (SA110 bug),
19491 then try to pop r3 instead. */
19493 live_regs_mask
|= 1 << 3;
19495 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19498 /* For interrupt returns we have to use an LDM rather than
19499 a POP so that we can use the exception return variant. */
19500 else if (IS_INTERRUPT (func_type
))
19501 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19503 sprintf (instr
, "pop%s\t{", conditional
);
19505 p
= instr
+ strlen (instr
);
19507 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19508 if (live_regs_mask
& (1 << reg
))
19510 int l
= strlen (reg_names
[reg
]);
19516 memcpy (p
, ", ", 2);
19520 memcpy (p
, "%|", 2);
19521 memcpy (p
+ 2, reg_names
[reg
], l
);
19525 if (live_regs_mask
& (1 << LR_REGNUM
))
19527 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19528 /* If returning from an interrupt, restore the CPSR. */
19529 if (IS_INTERRUPT (func_type
))
19536 output_asm_insn (instr
, & operand
);
19538 /* See if we need to generate an extra instruction to
19539 perform the actual function return. */
19541 && func_type
!= ARM_FT_INTERWORKED
19542 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19544 /* The return has already been handled
19545 by loading the LR into the PC. */
19552 switch ((int) ARM_FUNC_TYPE (func_type
))
19556 /* ??? This is wrong for unified assembly syntax. */
19557 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19560 case ARM_FT_INTERWORKED
:
19561 gcc_assert (arm_arch5
|| arm_arch4t
);
19562 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19565 case ARM_FT_EXCEPTION
:
19566 /* ??? This is wrong for unified assembly syntax. */
19567 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19571 if (IS_CMSE_ENTRY (func_type
))
19573 /* Check if we have to clear the 'GE bits' which is only used if
19574 parallel add and subtraction instructions are available. */
19575 if (TARGET_INT_SIMD
)
19576 snprintf (instr
, sizeof (instr
),
19577 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19579 snprintf (instr
, sizeof (instr
),
19580 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19582 output_asm_insn (instr
, & operand
);
19583 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19585 /* Clear the cumulative exception-status bits (0-4,7) and the
19586 condition code bits (28-31) of the FPSCR. We need to
19587 remember to clear the first scratch register used (IP) and
19588 save and restore the second (r4). */
19589 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19590 output_asm_insn (instr
, & operand
);
19591 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19592 output_asm_insn (instr
, & operand
);
19593 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19594 output_asm_insn (instr
, & operand
);
19595 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19596 output_asm_insn (instr
, & operand
);
19597 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19598 output_asm_insn (instr
, & operand
);
19599 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19600 output_asm_insn (instr
, & operand
);
19601 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19602 output_asm_insn (instr
, & operand
);
19603 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19604 output_asm_insn (instr
, & operand
);
19606 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19608 /* Use bx if it's available. */
19609 else if (arm_arch5
|| arm_arch4t
)
19610 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19612 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19616 output_asm_insn (instr
, & operand
);
19622 /* Output in FILE asm statements needed to declare the NAME of the function
19623 defined by its DECL node. */
19626 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19628 size_t cmse_name_len
;
19629 char *cmse_name
= 0;
19630 char cmse_prefix
[] = "__acle_se_";
19632 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19633 extra function label for each function with the 'cmse_nonsecure_entry'
19634 attribute. This extra function label should be prepended with
19635 '__acle_se_', telling the linker that it needs to create secure gateway
19636 veneers for this function. */
19637 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19638 DECL_ATTRIBUTES (decl
)))
19640 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19641 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19642 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19643 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19645 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19646 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19649 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19650 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19651 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19652 ASM_OUTPUT_LABEL (file
, name
);
19655 ASM_OUTPUT_LABEL (file
, cmse_name
);
19657 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19660 /* Write the function name into the code section, directly preceding
19661 the function prologue.
19663 Code will be output similar to this:
19665 .ascii "arm_poke_function_name", 0
19668 .word 0xff000000 + (t1 - t0)
19669 arm_poke_function_name
19671 stmfd sp!, {fp, ip, lr, pc}
19674 When performing a stack backtrace, code can inspect the value
19675 of 'pc' stored at 'fp' + 0. If the trace function then looks
19676 at location pc - 12 and the top 8 bits are set, then we know
19677 that there is a function name embedded immediately preceding this
19678 location and has length ((pc[-3]) & 0xff000000).
19680 We assume that pc is declared as a pointer to an unsigned long.
19682 It is of no benefit to output the function name if we are assembling
19683 a leaf function. These function types will not contain a stack
19684 backtrace structure, therefore it is not possible to determine the
19687 arm_poke_function_name (FILE *stream
, const char *name
)
19689 unsigned long alignlength
;
19690 unsigned long length
;
19693 length
= strlen (name
) + 1;
19694 alignlength
= ROUND_UP_WORD (length
);
19696 ASM_OUTPUT_ASCII (stream
, name
, length
);
19697 ASM_OUTPUT_ALIGN (stream
, 2);
19698 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19699 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19702 /* Place some comments into the assembler stream
19703 describing the current function. */
19705 arm_output_function_prologue (FILE *f
)
19707 unsigned long func_type
;
19709 /* Sanity check. */
19710 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19712 func_type
= arm_current_func_type ();
19714 switch ((int) ARM_FUNC_TYPE (func_type
))
19717 case ARM_FT_NORMAL
:
19719 case ARM_FT_INTERWORKED
:
19720 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19723 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19726 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19728 case ARM_FT_EXCEPTION
:
19729 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19733 if (IS_NAKED (func_type
))
19734 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19736 if (IS_VOLATILE (func_type
))
19737 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19739 if (IS_NESTED (func_type
))
19740 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19741 if (IS_STACKALIGN (func_type
))
19742 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19743 if (IS_CMSE_ENTRY (func_type
))
19744 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19746 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19748 crtl
->args
.pretend_args_size
,
19749 (HOST_WIDE_INT
) get_frame_size ());
19751 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19752 frame_pointer_needed
,
19753 cfun
->machine
->uses_anonymous_args
);
19755 if (cfun
->machine
->lr_save_eliminated
)
19756 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19758 if (crtl
->calls_eh_return
)
19759 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19764 arm_output_function_epilogue (FILE *)
19766 arm_stack_offsets
*offsets
;
19772 /* Emit any call-via-reg trampolines that are needed for v4t support
19773 of call_reg and call_value_reg type insns. */
19774 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19776 rtx label
= cfun
->machine
->call_via
[regno
];
19780 switch_to_section (function_section (current_function_decl
));
19781 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19782 CODE_LABEL_NUMBER (label
));
19783 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19787 /* ??? Probably not safe to set this here, since it assumes that a
19788 function will be emitted as assembly immediately after we generate
19789 RTL for it. This does not happen for inline functions. */
19790 cfun
->machine
->return_used_this_function
= 0;
19792 else /* TARGET_32BIT */
19794 /* We need to take into account any stack-frame rounding. */
19795 offsets
= arm_get_frame_offsets ();
19797 gcc_assert (!use_return_insn (FALSE
, NULL
)
19798 || (cfun
->machine
->return_used_this_function
!= 0)
19799 || offsets
->saved_regs
== offsets
->outgoing_args
19800 || frame_pointer_needed
);
19804 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19805 STR and STRD. If an even number of registers are being pushed, one
19806 or more STRD patterns are created for each register pair. If an
19807 odd number of registers are pushed, emit an initial STR followed by
19808 as many STRD instructions as are needed. This works best when the
19809 stack is initially 64-bit aligned (the normal case), since it
19810 ensures that each STRD is also 64-bit aligned. */
19812 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19817 rtx par
= NULL_RTX
;
19818 rtx dwarf
= NULL_RTX
;
19822 num_regs
= bit_count (saved_regs_mask
);
19824 /* Must be at least one register to save, and can't save SP or PC. */
19825 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19826 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19827 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19829 /* Create sequence for DWARF info. All the frame-related data for
19830 debugging is held in this wrapper. */
19831 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19833 /* Describe the stack adjustment. */
19834 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19835 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19836 RTX_FRAME_RELATED_P (tmp
) = 1;
19837 XVECEXP (dwarf
, 0, 0) = tmp
;
19839 /* Find the first register. */
19840 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19845 /* If there's an odd number of registers to push. Start off by
19846 pushing a single register. This ensures that subsequent strd
19847 operations are dword aligned (assuming that SP was originally
19848 64-bit aligned). */
19849 if ((num_regs
& 1) != 0)
19851 rtx reg
, mem
, insn
;
19853 reg
= gen_rtx_REG (SImode
, regno
);
19855 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19856 stack_pointer_rtx
));
19858 mem
= gen_frame_mem (Pmode
,
19860 (Pmode
, stack_pointer_rtx
,
19861 plus_constant (Pmode
, stack_pointer_rtx
,
19864 tmp
= gen_rtx_SET (mem
, reg
);
19865 RTX_FRAME_RELATED_P (tmp
) = 1;
19866 insn
= emit_insn (tmp
);
19867 RTX_FRAME_RELATED_P (insn
) = 1;
19868 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19869 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19870 RTX_FRAME_RELATED_P (tmp
) = 1;
19873 XVECEXP (dwarf
, 0, i
) = tmp
;
19877 while (i
< num_regs
)
19878 if (saved_regs_mask
& (1 << regno
))
19880 rtx reg1
, reg2
, mem1
, mem2
;
19881 rtx tmp0
, tmp1
, tmp2
;
19884 /* Find the register to pair with this one. */
19885 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19889 reg1
= gen_rtx_REG (SImode
, regno
);
19890 reg2
= gen_rtx_REG (SImode
, regno2
);
19897 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19900 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19902 -4 * (num_regs
- 1)));
19903 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19904 plus_constant (Pmode
, stack_pointer_rtx
,
19906 tmp1
= gen_rtx_SET (mem1
, reg1
);
19907 tmp2
= gen_rtx_SET (mem2
, reg2
);
19908 RTX_FRAME_RELATED_P (tmp0
) = 1;
19909 RTX_FRAME_RELATED_P (tmp1
) = 1;
19910 RTX_FRAME_RELATED_P (tmp2
) = 1;
19911 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19912 XVECEXP (par
, 0, 0) = tmp0
;
19913 XVECEXP (par
, 0, 1) = tmp1
;
19914 XVECEXP (par
, 0, 2) = tmp2
;
19915 insn
= emit_insn (par
);
19916 RTX_FRAME_RELATED_P (insn
) = 1;
19917 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19921 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19924 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19927 tmp1
= gen_rtx_SET (mem1
, reg1
);
19928 tmp2
= gen_rtx_SET (mem2
, reg2
);
19929 RTX_FRAME_RELATED_P (tmp1
) = 1;
19930 RTX_FRAME_RELATED_P (tmp2
) = 1;
19931 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19932 XVECEXP (par
, 0, 0) = tmp1
;
19933 XVECEXP (par
, 0, 1) = tmp2
;
19937 /* Create unwind information. This is an approximation. */
19938 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19939 plus_constant (Pmode
,
19943 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19944 plus_constant (Pmode
,
19949 RTX_FRAME_RELATED_P (tmp1
) = 1;
19950 RTX_FRAME_RELATED_P (tmp2
) = 1;
19951 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19952 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19954 regno
= regno2
+ 1;
19962 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19963 whenever possible, otherwise it emits single-word stores. The first store
19964 also allocates stack space for all saved registers, using writeback with
19965 post-addressing mode. All other stores use offset addressing. If no STRD
19966 can be emitted, this function emits a sequence of single-word stores,
19967 and not an STM as before, because single-word stores provide more freedom
19968 scheduling and can be turned into an STM by peephole optimizations. */
19970 arm_emit_strd_push (unsigned long saved_regs_mask
)
19973 int i
, j
, dwarf_index
= 0;
19975 rtx dwarf
= NULL_RTX
;
19976 rtx insn
= NULL_RTX
;
19979 /* TODO: A more efficient code can be emitted by changing the
19980 layout, e.g., first push all pairs that can use STRD to keep the
19981 stack aligned, and then push all other registers. */
19982 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19983 if (saved_regs_mask
& (1 << i
))
19986 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19987 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19988 gcc_assert (num_regs
> 0);
19990 /* Create sequence for DWARF info. */
19991 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19993 /* For dwarf info, we generate explicit stack update. */
19994 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19995 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19996 RTX_FRAME_RELATED_P (tmp
) = 1;
19997 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19999 /* Save registers. */
20000 offset
= - 4 * num_regs
;
20002 while (j
<= LAST_ARM_REGNUM
)
20003 if (saved_regs_mask
& (1 << j
))
20006 && (saved_regs_mask
& (1 << (j
+ 1))))
20008 /* Current register and previous register form register pair for
20009 which STRD can be generated. */
20012 /* Allocate stack space for all saved registers. */
20013 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20014 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20015 mem
= gen_frame_mem (DImode
, tmp
);
20018 else if (offset
> 0)
20019 mem
= gen_frame_mem (DImode
,
20020 plus_constant (Pmode
,
20024 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20026 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20027 RTX_FRAME_RELATED_P (tmp
) = 1;
20028 tmp
= emit_insn (tmp
);
20030 /* Record the first store insn. */
20031 if (dwarf_index
== 1)
20034 /* Generate dwarf info. */
20035 mem
= gen_frame_mem (SImode
,
20036 plus_constant (Pmode
,
20039 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20040 RTX_FRAME_RELATED_P (tmp
) = 1;
20041 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20043 mem
= gen_frame_mem (SImode
,
20044 plus_constant (Pmode
,
20047 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20048 RTX_FRAME_RELATED_P (tmp
) = 1;
20049 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20056 /* Emit a single word store. */
20059 /* Allocate stack space for all saved registers. */
20060 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20061 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20062 mem
= gen_frame_mem (SImode
, tmp
);
20065 else if (offset
> 0)
20066 mem
= gen_frame_mem (SImode
,
20067 plus_constant (Pmode
,
20071 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20073 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20074 RTX_FRAME_RELATED_P (tmp
) = 1;
20075 tmp
= emit_insn (tmp
);
20077 /* Record the first store insn. */
20078 if (dwarf_index
== 1)
20081 /* Generate dwarf info. */
20082 mem
= gen_frame_mem (SImode
,
20083 plus_constant(Pmode
,
20086 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20087 RTX_FRAME_RELATED_P (tmp
) = 1;
20088 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20097 /* Attach dwarf info to the first insn we generate. */
20098 gcc_assert (insn
!= NULL_RTX
);
20099 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20100 RTX_FRAME_RELATED_P (insn
) = 1;
20103 /* Generate and emit an insn that we will recognize as a push_multi.
20104 Unfortunately, since this insn does not reflect very well the actual
20105 semantics of the operation, we need to annotate the insn for the benefit
20106 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20107 MASK for registers that should be annotated for DWARF2 frame unwind
20110 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20113 int num_dwarf_regs
= 0;
20117 int dwarf_par_index
;
20120 /* We don't record the PC in the dwarf frame information. */
20121 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20123 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20125 if (mask
& (1 << i
))
20127 if (dwarf_regs_mask
& (1 << i
))
20131 gcc_assert (num_regs
&& num_regs
<= 16);
20132 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20134 /* For the body of the insn we are going to generate an UNSPEC in
20135 parallel with several USEs. This allows the insn to be recognized
20136 by the push_multi pattern in the arm.md file.
20138 The body of the insn looks something like this:
20141 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20142 (const_int:SI <num>)))
20143 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20149 For the frame note however, we try to be more explicit and actually
20150 show each register being stored into the stack frame, plus a (single)
20151 decrement of the stack pointer. We do it this way in order to be
20152 friendly to the stack unwinding code, which only wants to see a single
20153 stack decrement per instruction. The RTL we generate for the note looks
20154 something like this:
20157 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20158 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20159 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20160 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20164 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20165 instead we'd have a parallel expression detailing all
20166 the stores to the various memory addresses so that debug
20167 information is more up-to-date. Remember however while writing
20168 this to take care of the constraints with the push instruction.
20170 Note also that this has to be taken care of for the VFP registers.
20172 For more see PR43399. */
20174 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20175 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20176 dwarf_par_index
= 1;
20178 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20180 if (mask
& (1 << i
))
20182 reg
= gen_rtx_REG (SImode
, i
);
20184 XVECEXP (par
, 0, 0)
20185 = gen_rtx_SET (gen_frame_mem
20187 gen_rtx_PRE_MODIFY (Pmode
,
20190 (Pmode
, stack_pointer_rtx
,
20193 gen_rtx_UNSPEC (BLKmode
,
20194 gen_rtvec (1, reg
),
20195 UNSPEC_PUSH_MULT
));
20197 if (dwarf_regs_mask
& (1 << i
))
20199 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20201 RTX_FRAME_RELATED_P (tmp
) = 1;
20202 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20209 for (j
= 1, i
++; j
< num_regs
; i
++)
20211 if (mask
& (1 << i
))
20213 reg
= gen_rtx_REG (SImode
, i
);
20215 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20217 if (dwarf_regs_mask
& (1 << i
))
20220 = gen_rtx_SET (gen_frame_mem
20222 plus_constant (Pmode
, stack_pointer_rtx
,
20225 RTX_FRAME_RELATED_P (tmp
) = 1;
20226 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20233 par
= emit_insn (par
);
20235 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20236 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20237 RTX_FRAME_RELATED_P (tmp
) = 1;
20238 XVECEXP (dwarf
, 0, 0) = tmp
;
20240 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20245 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20246 SIZE is the offset to be adjusted.
20247 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20249 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20253 RTX_FRAME_RELATED_P (insn
) = 1;
20254 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20255 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20258 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20259 SAVED_REGS_MASK shows which registers need to be restored.
20261 Unfortunately, since this insn does not reflect very well the actual
20262 semantics of the operation, we need to annotate the insn for the benefit
20263 of DWARF2 frame unwind information. */
20265 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20270 rtx dwarf
= NULL_RTX
;
20272 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20276 offset_adj
= return_in_pc
? 1 : 0;
20277 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20278 if (saved_regs_mask
& (1 << i
))
20281 gcc_assert (num_regs
&& num_regs
<= 16);
20283 /* If SP is in reglist, then we don't emit SP update insn. */
20284 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20286 /* The parallel needs to hold num_regs SETs
20287 and one SET for the stack update. */
20288 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20291 XVECEXP (par
, 0, 0) = ret_rtx
;
20295 /* Increment the stack pointer, based on there being
20296 num_regs 4-byte registers to restore. */
20297 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20298 plus_constant (Pmode
,
20301 RTX_FRAME_RELATED_P (tmp
) = 1;
20302 XVECEXP (par
, 0, offset_adj
) = tmp
;
20305 /* Now restore every reg, which may include PC. */
20306 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20307 if (saved_regs_mask
& (1 << i
))
20309 reg
= gen_rtx_REG (SImode
, i
);
20310 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20312 /* Emit single load with writeback. */
20313 tmp
= gen_frame_mem (SImode
,
20314 gen_rtx_POST_INC (Pmode
,
20315 stack_pointer_rtx
));
20316 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20317 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20321 tmp
= gen_rtx_SET (reg
,
20324 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20325 RTX_FRAME_RELATED_P (tmp
) = 1;
20326 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20328 /* We need to maintain a sequence for DWARF info too. As dwarf info
20329 should not have PC, skip PC. */
20330 if (i
!= PC_REGNUM
)
20331 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20337 par
= emit_jump_insn (par
);
20339 par
= emit_insn (par
);
20341 REG_NOTES (par
) = dwarf
;
20343 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20344 stack_pointer_rtx
, stack_pointer_rtx
);
20347 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20348 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20350 Unfortunately, since this insn does not reflect very well the actual
20351 semantics of the operation, we need to annotate the insn for the benefit
20352 of DWARF2 frame unwind information. */
20354 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20358 rtx dwarf
= NULL_RTX
;
20361 gcc_assert (num_regs
&& num_regs
<= 32);
20363 /* Workaround ARM10 VFPr1 bug. */
20364 if (num_regs
== 2 && !arm_arch6
)
20366 if (first_reg
== 15)
20372 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20373 there could be up to 32 D-registers to restore.
20374 If there are more than 16 D-registers, make two recursive calls,
20375 each of which emits one pop_multi instruction. */
20378 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20379 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20383 /* The parallel needs to hold num_regs SETs
20384 and one SET for the stack update. */
20385 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20387 /* Increment the stack pointer, based on there being
20388 num_regs 8-byte registers to restore. */
20389 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20390 RTX_FRAME_RELATED_P (tmp
) = 1;
20391 XVECEXP (par
, 0, 0) = tmp
;
20393 /* Now show every reg that will be restored, using a SET for each. */
20394 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20396 reg
= gen_rtx_REG (DFmode
, i
);
20398 tmp
= gen_rtx_SET (reg
,
20401 plus_constant (Pmode
, base_reg
, 8 * j
)));
20402 RTX_FRAME_RELATED_P (tmp
) = 1;
20403 XVECEXP (par
, 0, j
+ 1) = tmp
;
20405 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20410 par
= emit_insn (par
);
20411 REG_NOTES (par
) = dwarf
;
20413 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20414 if (REGNO (base_reg
) == IP_REGNUM
)
20416 RTX_FRAME_RELATED_P (par
) = 1;
20417 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20420 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20421 base_reg
, base_reg
);
20424 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20425 number of registers are being popped, multiple LDRD patterns are created for
20426 all register pairs. If odd number of registers are popped, last register is
20427 loaded by using LDR pattern. */
20429 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20433 rtx par
= NULL_RTX
;
20434 rtx dwarf
= NULL_RTX
;
20435 rtx tmp
, reg
, tmp1
;
20436 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20438 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20439 if (saved_regs_mask
& (1 << i
))
20442 gcc_assert (num_regs
&& num_regs
<= 16);
20444 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20445 to be popped. So, if num_regs is even, now it will become odd,
20446 and we can generate pop with PC. If num_regs is odd, it will be
20447 even now, and ldr with return can be generated for PC. */
20451 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20453 /* Var j iterates over all the registers to gather all the registers in
20454 saved_regs_mask. Var i gives index of saved registers in stack frame.
20455 A PARALLEL RTX of register-pair is created here, so that pattern for
20456 LDRD can be matched. As PC is always last register to be popped, and
20457 we have already decremented num_regs if PC, we don't have to worry
20458 about PC in this loop. */
20459 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20460 if (saved_regs_mask
& (1 << j
))
20462 /* Create RTX for memory load. */
20463 reg
= gen_rtx_REG (SImode
, j
);
20464 tmp
= gen_rtx_SET (reg
,
20465 gen_frame_mem (SImode
,
20466 plus_constant (Pmode
,
20467 stack_pointer_rtx
, 4 * i
)));
20468 RTX_FRAME_RELATED_P (tmp
) = 1;
20472 /* When saved-register index (i) is even, the RTX to be emitted is
20473 yet to be created. Hence create it first. The LDRD pattern we
20474 are generating is :
20475 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20476 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20477 where target registers need not be consecutive. */
20478 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20482 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20483 added as 0th element and if i is odd, reg_i is added as 1st element
20484 of LDRD pattern shown above. */
20485 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20486 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20490 /* When saved-register index (i) is odd, RTXs for both the registers
20491 to be loaded are generated in above given LDRD pattern, and the
20492 pattern can be emitted now. */
20493 par
= emit_insn (par
);
20494 REG_NOTES (par
) = dwarf
;
20495 RTX_FRAME_RELATED_P (par
) = 1;
20501 /* If the number of registers pushed is odd AND return_in_pc is false OR
20502 number of registers are even AND return_in_pc is true, last register is
20503 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20504 then LDR with post increment. */
20506 /* Increment the stack pointer, based on there being
20507 num_regs 4-byte registers to restore. */
20508 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20509 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20510 RTX_FRAME_RELATED_P (tmp
) = 1;
20511 tmp
= emit_insn (tmp
);
20514 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20515 stack_pointer_rtx
, stack_pointer_rtx
);
20520 if (((num_regs
% 2) == 1 && !return_in_pc
)
20521 || ((num_regs
% 2) == 0 && return_in_pc
))
20523 /* Scan for the single register to be popped. Skip until the saved
20524 register is found. */
20525 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20527 /* Gen LDR with post increment here. */
20528 tmp1
= gen_rtx_MEM (SImode
,
20529 gen_rtx_POST_INC (SImode
,
20530 stack_pointer_rtx
));
20531 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20533 reg
= gen_rtx_REG (SImode
, j
);
20534 tmp
= gen_rtx_SET (reg
, tmp1
);
20535 RTX_FRAME_RELATED_P (tmp
) = 1;
20536 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20540 /* If return_in_pc, j must be PC_REGNUM. */
20541 gcc_assert (j
== PC_REGNUM
);
20542 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20543 XVECEXP (par
, 0, 0) = ret_rtx
;
20544 XVECEXP (par
, 0, 1) = tmp
;
20545 par
= emit_jump_insn (par
);
20549 par
= emit_insn (tmp
);
20550 REG_NOTES (par
) = dwarf
;
20551 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20552 stack_pointer_rtx
, stack_pointer_rtx
);
20556 else if ((num_regs
% 2) == 1 && return_in_pc
)
20558 /* There are 2 registers to be popped. So, generate the pattern
20559 pop_multiple_with_stack_update_and_return to pop in PC. */
20560 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20566 /* LDRD in ARM mode needs consecutive registers as operands. This function
20567 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20568 offset addressing and then generates one separate stack udpate. This provides
20569 more scheduling freedom, compared to writeback on every load. However,
20570 if the function returns using load into PC directly
20571 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20572 before the last load. TODO: Add a peephole optimization to recognize
20573 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20574 peephole optimization to merge the load at stack-offset zero
20575 with the stack update instruction using load with writeback
20576 in post-index addressing mode. */
20578 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20582 rtx par
= NULL_RTX
;
20583 rtx dwarf
= NULL_RTX
;
20586 /* Restore saved registers. */
20587 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20589 while (j
<= LAST_ARM_REGNUM
)
20590 if (saved_regs_mask
& (1 << j
))
20593 && (saved_regs_mask
& (1 << (j
+ 1)))
20594 && (j
+ 1) != PC_REGNUM
)
20596 /* Current register and next register form register pair for which
20597 LDRD can be generated. PC is always the last register popped, and
20598 we handle it separately. */
20600 mem
= gen_frame_mem (DImode
,
20601 plus_constant (Pmode
,
20605 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20607 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20608 tmp
= emit_insn (tmp
);
20609 RTX_FRAME_RELATED_P (tmp
) = 1;
20611 /* Generate dwarf info. */
20613 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20614 gen_rtx_REG (SImode
, j
),
20616 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20617 gen_rtx_REG (SImode
, j
+ 1),
20620 REG_NOTES (tmp
) = dwarf
;
20625 else if (j
!= PC_REGNUM
)
20627 /* Emit a single word load. */
20629 mem
= gen_frame_mem (SImode
,
20630 plus_constant (Pmode
,
20634 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20636 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20637 tmp
= emit_insn (tmp
);
20638 RTX_FRAME_RELATED_P (tmp
) = 1;
20640 /* Generate dwarf info. */
20641 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20642 gen_rtx_REG (SImode
, j
),
20648 else /* j == PC_REGNUM */
20654 /* Update the stack. */
20657 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20658 plus_constant (Pmode
,
20661 tmp
= emit_insn (tmp
);
20662 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20663 stack_pointer_rtx
, stack_pointer_rtx
);
20667 if (saved_regs_mask
& (1 << PC_REGNUM
))
20669 /* Only PC is to be popped. */
20670 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20671 XVECEXP (par
, 0, 0) = ret_rtx
;
20672 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20673 gen_frame_mem (SImode
,
20674 gen_rtx_POST_INC (SImode
,
20675 stack_pointer_rtx
)));
20676 RTX_FRAME_RELATED_P (tmp
) = 1;
20677 XVECEXP (par
, 0, 1) = tmp
;
20678 par
= emit_jump_insn (par
);
20680 /* Generate dwarf info. */
20681 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20682 gen_rtx_REG (SImode
, PC_REGNUM
),
20684 REG_NOTES (par
) = dwarf
;
20685 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20686 stack_pointer_rtx
, stack_pointer_rtx
);
20690 /* Calculate the size of the return value that is passed in registers. */
20692 arm_size_return_regs (void)
20696 if (crtl
->return_rtx
!= 0)
20697 mode
= GET_MODE (crtl
->return_rtx
);
20699 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20701 return GET_MODE_SIZE (mode
);
20704 /* Return true if the current function needs to save/restore LR. */
20706 thumb_force_lr_save (void)
20708 return !cfun
->machine
->lr_save_eliminated
20710 || thumb_far_jump_used_p ()
20711 || df_regs_ever_live_p (LR_REGNUM
));
20714 /* We do not know if r3 will be available because
20715 we do have an indirect tailcall happening in this
20716 particular case. */
20718 is_indirect_tailcall_p (rtx call
)
20720 rtx pat
= PATTERN (call
);
20722 /* Indirect tail call. */
20723 pat
= XVECEXP (pat
, 0, 0);
20724 if (GET_CODE (pat
) == SET
)
20725 pat
= SET_SRC (pat
);
20727 pat
= XEXP (XEXP (pat
, 0), 0);
20728 return REG_P (pat
);
20731 /* Return true if r3 is used by any of the tail call insns in the
20732 current function. */
20734 any_sibcall_could_use_r3 (void)
20739 if (!crtl
->tail_call_emit
)
20741 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20742 if (e
->flags
& EDGE_SIBCALL
)
20744 rtx_insn
*call
= BB_END (e
->src
);
20745 if (!CALL_P (call
))
20746 call
= prev_nonnote_nondebug_insn (call
);
20747 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20748 if (find_regno_fusage (call
, USE
, 3)
20749 || is_indirect_tailcall_p (call
))
20756 /* Compute the distance from register FROM to register TO.
20757 These can be the arg pointer (26), the soft frame pointer (25),
20758 the stack pointer (13) or the hard frame pointer (11).
20759 In thumb mode r7 is used as the soft frame pointer, if needed.
20760 Typical stack layout looks like this:
20762 old stack pointer -> | |
20765 | | saved arguments for
20766 | | vararg functions
20769 hard FP & arg pointer -> | | \
20777 soft frame pointer -> | | /
20782 locals base pointer -> | | /
20787 current stack pointer -> | | /
20790 For a given function some or all of these stack components
20791 may not be needed, giving rise to the possibility of
20792 eliminating some of the registers.
20794 The values returned by this function must reflect the behavior
20795 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20797 The sign of the number returned reflects the direction of stack
20798 growth, so the values are positive for all eliminations except
20799 from the soft frame pointer to the hard frame pointer.
20801 SFP may point just inside the local variables block to ensure correct
20805 /* Return cached stack offsets. */
20807 static arm_stack_offsets
*
20808 arm_get_frame_offsets (void)
20810 struct arm_stack_offsets
*offsets
;
20812 offsets
= &cfun
->machine
->stack_offsets
;
20818 /* Calculate stack offsets. These are used to calculate register elimination
20819 offsets and in prologue/epilogue code. Also calculates which registers
20820 should be saved. */
20823 arm_compute_frame_layout (void)
20825 struct arm_stack_offsets
*offsets
;
20826 unsigned long func_type
;
20829 HOST_WIDE_INT frame_size
;
20832 offsets
= &cfun
->machine
->stack_offsets
;
20834 /* Initially this is the size of the local variables. It will translated
20835 into an offset once we have determined the size of preceding data. */
20836 frame_size
= ROUND_UP_WORD (get_frame_size ());
20838 /* Space for variadic functions. */
20839 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20841 /* In Thumb mode this is incorrect, but never used. */
20843 = (offsets
->saved_args
20844 + arm_compute_static_chain_stack_bytes ()
20845 + (frame_pointer_needed
? 4 : 0));
20849 unsigned int regno
;
20851 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
20852 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20853 saved
= core_saved
;
20855 /* We know that SP will be doubleword aligned on entry, and we must
20856 preserve that condition at any subroutine call. We also require the
20857 soft frame pointer to be doubleword aligned. */
20859 if (TARGET_REALLY_IWMMXT
)
20861 /* Check for the call-saved iWMMXt registers. */
20862 for (regno
= FIRST_IWMMXT_REGNUM
;
20863 regno
<= LAST_IWMMXT_REGNUM
;
20865 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20869 func_type
= arm_current_func_type ();
20870 /* Space for saved VFP registers. */
20871 if (! IS_VOLATILE (func_type
)
20872 && TARGET_HARD_FLOAT
)
20873 saved
+= arm_get_vfp_saved_size ();
20875 else /* TARGET_THUMB1 */
20877 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
20878 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20879 saved
= core_saved
;
20880 if (TARGET_BACKTRACE
)
20884 /* Saved registers include the stack frame. */
20885 offsets
->saved_regs
20886 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20887 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20889 /* A leaf function does not need any stack alignment if it has nothing
20891 if (crtl
->is_leaf
&& frame_size
== 0
20892 /* However if it calls alloca(), we have a dynamically allocated
20893 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20894 && ! cfun
->calls_alloca
)
20896 offsets
->outgoing_args
= offsets
->soft_frame
;
20897 offsets
->locals_base
= offsets
->soft_frame
;
20901 /* Ensure SFP has the correct alignment. */
20902 if (ARM_DOUBLEWORD_ALIGN
20903 && (offsets
->soft_frame
& 7))
20905 offsets
->soft_frame
+= 4;
20906 /* Try to align stack by pushing an extra reg. Don't bother doing this
20907 when there is a stack frame as the alignment will be rolled into
20908 the normal stack adjustment. */
20909 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20913 /* Register r3 is caller-saved. Normally it does not need to be
20914 saved on entry by the prologue. However if we choose to save
20915 it for padding then we may confuse the compiler into thinking
20916 a prologue sequence is required when in fact it is not. This
20917 will occur when shrink-wrapping if r3 is used as a scratch
20918 register and there are no other callee-saved writes.
20920 This situation can be avoided when other callee-saved registers
20921 are available and r3 is not mandatory if we choose a callee-saved
20922 register for padding. */
20923 bool prefer_callee_reg_p
= false;
20925 /* If it is safe to use r3, then do so. This sometimes
20926 generates better code on Thumb-2 by avoiding the need to
20927 use 32-bit push/pop instructions. */
20928 if (! any_sibcall_could_use_r3 ()
20929 && arm_size_return_regs () <= 12
20930 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20932 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20935 if (!TARGET_THUMB2
)
20936 prefer_callee_reg_p
= true;
20939 || prefer_callee_reg_p
)
20941 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20943 /* Avoid fixed registers; they may be changed at
20944 arbitrary times so it's unsafe to restore them
20945 during the epilogue. */
20947 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20957 offsets
->saved_regs
+= 4;
20958 offsets
->saved_regs_mask
|= (1 << reg
);
20963 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20964 offsets
->outgoing_args
= (offsets
->locals_base
20965 + crtl
->outgoing_args_size
);
20967 if (ARM_DOUBLEWORD_ALIGN
)
20969 /* Ensure SP remains doubleword aligned. */
20970 if (offsets
->outgoing_args
& 7)
20971 offsets
->outgoing_args
+= 4;
20972 gcc_assert (!(offsets
->outgoing_args
& 7));
20977 /* Calculate the relative offsets for the different stack pointers. Positive
20978 offsets are in the direction of stack growth. */
20981 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20983 arm_stack_offsets
*offsets
;
20985 offsets
= arm_get_frame_offsets ();
20987 /* OK, now we have enough information to compute the distances.
20988 There must be an entry in these switch tables for each pair
20989 of registers in ELIMINABLE_REGS, even if some of the entries
20990 seem to be redundant or useless. */
20993 case ARG_POINTER_REGNUM
:
20996 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20999 case FRAME_POINTER_REGNUM
:
21000 /* This is the reverse of the soft frame pointer
21001 to hard frame pointer elimination below. */
21002 return offsets
->soft_frame
- offsets
->saved_args
;
21004 case ARM_HARD_FRAME_POINTER_REGNUM
:
21005 /* This is only non-zero in the case where the static chain register
21006 is stored above the frame. */
21007 return offsets
->frame
- offsets
->saved_args
- 4;
21009 case STACK_POINTER_REGNUM
:
21010 /* If nothing has been pushed on the stack at all
21011 then this will return -4. This *is* correct! */
21012 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21015 gcc_unreachable ();
21017 gcc_unreachable ();
21019 case FRAME_POINTER_REGNUM
:
21022 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21025 case ARM_HARD_FRAME_POINTER_REGNUM
:
21026 /* The hard frame pointer points to the top entry in the
21027 stack frame. The soft frame pointer to the bottom entry
21028 in the stack frame. If there is no stack frame at all,
21029 then they are identical. */
21031 return offsets
->frame
- offsets
->soft_frame
;
21033 case STACK_POINTER_REGNUM
:
21034 return offsets
->outgoing_args
- offsets
->soft_frame
;
21037 gcc_unreachable ();
21039 gcc_unreachable ();
21042 /* You cannot eliminate from the stack pointer.
21043 In theory you could eliminate from the hard frame
21044 pointer to the stack pointer, but this will never
21045 happen, since if a stack frame is not needed the
21046 hard frame pointer will never be used. */
21047 gcc_unreachable ();
21051 /* Given FROM and TO register numbers, say whether this elimination is
21052 allowed. Frame pointer elimination is automatically handled.
21054 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21055 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21056 pointer, we must eliminate FRAME_POINTER_REGNUM into
21057 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21058 ARG_POINTER_REGNUM. */
21061 arm_can_eliminate (const int from
, const int to
)
21063 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21064 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21065 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21066 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21070 /* Emit RTL to save coprocessor registers on function entry. Returns the
21071 number of bytes pushed. */
21074 arm_save_coproc_regs(void)
21076 int saved_size
= 0;
21078 unsigned start_reg
;
21081 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21082 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21084 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21085 insn
= gen_rtx_MEM (V2SImode
, insn
);
21086 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21087 RTX_FRAME_RELATED_P (insn
) = 1;
21091 if (TARGET_HARD_FLOAT
)
21093 start_reg
= FIRST_VFP_REGNUM
;
21095 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21097 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21098 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21100 if (start_reg
!= reg
)
21101 saved_size
+= vfp_emit_fstmd (start_reg
,
21102 (reg
- start_reg
) / 2);
21103 start_reg
= reg
+ 2;
21106 if (start_reg
!= reg
)
21107 saved_size
+= vfp_emit_fstmd (start_reg
,
21108 (reg
- start_reg
) / 2);
21114 /* Set the Thumb frame pointer from the stack pointer. */
21117 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21119 HOST_WIDE_INT amount
;
21122 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21124 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21125 stack_pointer_rtx
, GEN_INT (amount
)));
21128 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21129 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21130 expects the first two operands to be the same. */
21133 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21135 hard_frame_pointer_rtx
));
21139 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21140 hard_frame_pointer_rtx
,
21141 stack_pointer_rtx
));
21143 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21144 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21145 RTX_FRAME_RELATED_P (dwarf
) = 1;
21146 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21149 RTX_FRAME_RELATED_P (insn
) = 1;
21152 struct scratch_reg
{
21157 /* Return a short-lived scratch register for use as a 2nd scratch register on
21158 function entry after the registers are saved in the prologue. This register
21159 must be released by means of release_scratch_register_on_entry. IP is not
21160 considered since it is always used as the 1st scratch register if available.
21162 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21163 mask of live registers. */
21166 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21167 unsigned long live_regs
)
21173 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21179 for (i
= 4; i
< 11; i
++)
21180 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21188 /* If IP is used as the 1st scratch register for a nested function,
21189 then either r3 wasn't available or is used to preserve IP. */
21190 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21192 regno
= (regno1
== 3 ? 2 : 3);
21194 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21199 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21202 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21203 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21204 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21205 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21206 RTX_FRAME_RELATED_P (insn
) = 1;
21207 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21211 /* Release a scratch register obtained from the preceding function. */
21214 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21218 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21219 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21220 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21221 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21222 RTX_FRAME_RELATED_P (insn
) = 1;
21223 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21227 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21229 #if PROBE_INTERVAL > 4096
21230 #error Cannot use indexed addressing mode for stack probing
21233 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21234 inclusive. These are offsets from the current stack pointer. REGNO1
21235 is the index number of the 1st scratch register and LIVE_REGS is the
21236 mask of live registers. */
21239 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21240 unsigned int regno1
, unsigned long live_regs
)
21242 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21244 /* See if we have a constant small number of probes to generate. If so,
21245 that's the easy case. */
21246 if (size
<= PROBE_INTERVAL
)
21248 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21249 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21250 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21253 /* The run-time loop is made up of 10 insns in the generic case while the
21254 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21255 else if (size
<= 5 * PROBE_INTERVAL
)
21257 HOST_WIDE_INT i
, rem
;
21259 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21260 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21261 emit_stack_probe (reg1
);
21263 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21264 it exceeds SIZE. If only two probes are needed, this will not
21265 generate any code. Then probe at FIRST + SIZE. */
21266 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21268 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21269 emit_stack_probe (reg1
);
21272 rem
= size
- (i
- PROBE_INTERVAL
);
21273 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21275 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21276 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21279 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21282 /* Otherwise, do the same as above, but in a loop. Note that we must be
21283 extra careful with variables wrapping around because we might be at
21284 the very top (or the very bottom) of the address space and we have
21285 to be able to handle this case properly; in particular, we use an
21286 equality test for the loop condition. */
21289 HOST_WIDE_INT rounded_size
;
21290 struct scratch_reg sr
;
21292 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21294 emit_move_insn (reg1
, GEN_INT (first
));
21297 /* Step 1: round SIZE to the previous multiple of the interval. */
21299 rounded_size
= size
& -PROBE_INTERVAL
;
21300 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21303 /* Step 2: compute initial and final value of the loop counter. */
21305 /* TEST_ADDR = SP + FIRST. */
21306 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21308 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21309 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21312 /* Step 3: the loop
21316 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21319 while (TEST_ADDR != LAST_ADDR)
21321 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21322 until it is equal to ROUNDED_SIZE. */
21324 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21327 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21328 that SIZE is equal to ROUNDED_SIZE. */
21330 if (size
!= rounded_size
)
21332 HOST_WIDE_INT rem
= size
- rounded_size
;
21334 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21336 emit_set_insn (sr
.reg
,
21337 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21338 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21339 PROBE_INTERVAL
- rem
));
21342 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21345 release_scratch_register_on_entry (&sr
);
21348 /* Make sure nothing is scheduled before we are done. */
21349 emit_insn (gen_blockage ());
21352 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21353 absolute addresses. */
21356 output_probe_stack_range (rtx reg1
, rtx reg2
)
21358 static int labelno
= 0;
21362 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21365 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21367 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21369 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21370 output_asm_insn ("sub\t%0, %0, %1", xops
);
21372 /* Probe at TEST_ADDR. */
21373 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21375 /* Test if TEST_ADDR == LAST_ADDR. */
21377 output_asm_insn ("cmp\t%0, %1", xops
);
21380 fputs ("\tbne\t", asm_out_file
);
21381 assemble_name_raw (asm_out_file
, loop_lab
);
21382 fputc ('\n', asm_out_file
);
21387 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21390 arm_expand_prologue (void)
21395 unsigned long live_regs_mask
;
21396 unsigned long func_type
;
21398 int saved_pretend_args
= 0;
21399 int saved_regs
= 0;
21400 unsigned HOST_WIDE_INT args_to_push
;
21401 HOST_WIDE_INT size
;
21402 arm_stack_offsets
*offsets
;
21405 func_type
= arm_current_func_type ();
21407 /* Naked functions don't have prologues. */
21408 if (IS_NAKED (func_type
))
21410 if (flag_stack_usage_info
)
21411 current_function_static_stack_size
= 0;
21415 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21416 args_to_push
= crtl
->args
.pretend_args_size
;
21418 /* Compute which register we will have to save onto the stack. */
21419 offsets
= arm_get_frame_offsets ();
21420 live_regs_mask
= offsets
->saved_regs_mask
;
21422 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21424 if (IS_STACKALIGN (func_type
))
21428 /* Handle a word-aligned stack pointer. We generate the following:
21433 <save and restore r0 in normal prologue/epilogue>
21437 The unwinder doesn't need to know about the stack realignment.
21438 Just tell it we saved SP in r0. */
21439 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21441 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21442 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21444 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21445 RTX_FRAME_RELATED_P (insn
) = 1;
21446 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21448 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21450 /* ??? The CFA changes here, which may cause GDB to conclude that it
21451 has entered a different function. That said, the unwind info is
21452 correct, individually, before and after this instruction because
21453 we've described the save of SP, which will override the default
21454 handling of SP as restoring from the CFA. */
21455 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21458 /* The static chain register is the same as the IP register. If it is
21459 clobbered when creating the frame, we need to save and restore it. */
21460 clobber_ip
= IS_NESTED (func_type
)
21461 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21462 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21463 && !df_regs_ever_live_p (LR_REGNUM
)
21464 && arm_r3_live_at_start_p ()));
21466 /* Find somewhere to store IP whilst the frame is being created.
21467 We try the following places in order:
21469 1. The last argument register r3 if it is available.
21470 2. A slot on the stack above the frame if there are no
21471 arguments to push onto the stack.
21472 3. Register r3 again, after pushing the argument registers
21473 onto the stack, if this is a varargs function.
21474 4. The last slot on the stack created for the arguments to
21475 push, if this isn't a varargs function.
21477 Note - we only need to tell the dwarf2 backend about the SP
21478 adjustment in the second variant; the static chain register
21479 doesn't need to be unwound, as it doesn't contain a value
21480 inherited from the caller. */
21483 if (!arm_r3_live_at_start_p ())
21484 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21485 else if (args_to_push
== 0)
21489 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21492 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21493 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21496 /* Just tell the dwarf backend that we adjusted SP. */
21497 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21498 plus_constant (Pmode
, stack_pointer_rtx
,
21500 RTX_FRAME_RELATED_P (insn
) = 1;
21501 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21505 /* Store the args on the stack. */
21506 if (cfun
->machine
->uses_anonymous_args
)
21508 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21509 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21510 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21511 saved_pretend_args
= 1;
21517 if (args_to_push
== 4)
21518 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21520 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21521 plus_constant (Pmode
,
21525 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21527 /* Just tell the dwarf backend that we adjusted SP. */
21528 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21529 plus_constant (Pmode
, stack_pointer_rtx
,
21531 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21534 RTX_FRAME_RELATED_P (insn
) = 1;
21535 fp_offset
= args_to_push
;
21540 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21542 if (IS_INTERRUPT (func_type
))
21544 /* Interrupt functions must not corrupt any registers.
21545 Creating a frame pointer however, corrupts the IP
21546 register, so we must push it first. */
21547 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21549 /* Do not set RTX_FRAME_RELATED_P on this insn.
21550 The dwarf stack unwinding code only wants to see one
21551 stack decrement per function, and this is not it. If
21552 this instruction is labeled as being part of the frame
21553 creation sequence then dwarf2out_frame_debug_expr will
21554 die when it encounters the assignment of IP to FP
21555 later on, since the use of SP here establishes SP as
21556 the CFA register and not IP.
21558 Anyway this instruction is not really part of the stack
21559 frame creation although it is part of the prologue. */
21562 insn
= emit_set_insn (ip_rtx
,
21563 plus_constant (Pmode
, stack_pointer_rtx
,
21565 RTX_FRAME_RELATED_P (insn
) = 1;
21570 /* Push the argument registers, or reserve space for them. */
21571 if (cfun
->machine
->uses_anonymous_args
)
21572 insn
= emit_multi_reg_push
21573 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21574 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21577 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21578 GEN_INT (- args_to_push
)));
21579 RTX_FRAME_RELATED_P (insn
) = 1;
21582 /* If this is an interrupt service routine, and the link register
21583 is going to be pushed, and we're not generating extra
21584 push of IP (needed when frame is needed and frame layout if apcs),
21585 subtracting four from LR now will mean that the function return
21586 can be done with a single instruction. */
21587 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21588 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21589 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21592 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21594 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21597 if (live_regs_mask
)
21599 unsigned long dwarf_regs_mask
= live_regs_mask
;
21601 saved_regs
+= bit_count (live_regs_mask
) * 4;
21602 if (optimize_size
&& !frame_pointer_needed
21603 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21605 /* If no coprocessor registers are being pushed and we don't have
21606 to worry about a frame pointer then push extra registers to
21607 create the stack frame. This is done in a way that does not
21608 alter the frame layout, so is independent of the epilogue. */
21612 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21614 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21615 if (frame
&& n
* 4 >= frame
)
21618 live_regs_mask
|= (1 << n
) - 1;
21619 saved_regs
+= frame
;
21624 && current_tune
->prefer_ldrd_strd
21625 && !optimize_function_for_size_p (cfun
))
21627 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21629 thumb2_emit_strd_push (live_regs_mask
);
21630 else if (TARGET_ARM
21631 && !TARGET_APCS_FRAME
21632 && !IS_INTERRUPT (func_type
))
21633 arm_emit_strd_push (live_regs_mask
);
21636 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21637 RTX_FRAME_RELATED_P (insn
) = 1;
21642 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21643 RTX_FRAME_RELATED_P (insn
) = 1;
21647 if (! IS_VOLATILE (func_type
))
21648 saved_regs
+= arm_save_coproc_regs ();
21650 if (frame_pointer_needed
&& TARGET_ARM
)
21652 /* Create the new frame pointer. */
21653 if (TARGET_APCS_FRAME
)
21655 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21656 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21657 RTX_FRAME_RELATED_P (insn
) = 1;
21661 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21662 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21663 stack_pointer_rtx
, insn
));
21664 RTX_FRAME_RELATED_P (insn
) = 1;
21668 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21669 if (flag_stack_usage_info
)
21670 current_function_static_stack_size
= size
;
21672 /* If this isn't an interrupt service routine and we have a frame, then do
21673 stack checking. We use IP as the first scratch register, except for the
21674 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21675 if (!IS_INTERRUPT (func_type
)
21676 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21678 unsigned int regno
;
21680 if (!IS_NESTED (func_type
) || clobber_ip
)
21682 else if (df_regs_ever_live_p (LR_REGNUM
))
21687 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21689 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21690 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21691 size
- STACK_CHECK_PROTECT
,
21692 regno
, live_regs_mask
);
21695 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21696 regno
, live_regs_mask
);
21699 /* Recover the static chain register. */
21702 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21703 insn
= gen_rtx_REG (SImode
, 3);
21706 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21707 insn
= gen_frame_mem (SImode
, insn
);
21709 emit_set_insn (ip_rtx
, insn
);
21710 emit_insn (gen_force_register_use (ip_rtx
));
21713 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21715 /* This add can produce multiple insns for a large constant, so we
21716 need to get tricky. */
21717 rtx_insn
*last
= get_last_insn ();
21719 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21720 - offsets
->outgoing_args
);
21722 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21726 last
= last
? NEXT_INSN (last
) : get_insns ();
21727 RTX_FRAME_RELATED_P (last
) = 1;
21729 while (last
!= insn
);
21731 /* If the frame pointer is needed, emit a special barrier that
21732 will prevent the scheduler from moving stores to the frame
21733 before the stack adjustment. */
21734 if (frame_pointer_needed
)
21735 emit_insn (gen_stack_tie (stack_pointer_rtx
,
21736 hard_frame_pointer_rtx
));
21740 if (frame_pointer_needed
&& TARGET_THUMB2
)
21741 thumb_set_frame_pointer (offsets
);
21743 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21745 unsigned long mask
;
21747 mask
= live_regs_mask
;
21748 mask
&= THUMB2_WORK_REGS
;
21749 if (!IS_NESTED (func_type
))
21750 mask
|= (1 << IP_REGNUM
);
21751 arm_load_pic_register (mask
);
21754 /* If we are profiling, make sure no instructions are scheduled before
21755 the call to mcount. Similarly if the user has requested no
21756 scheduling in the prolog. Similarly if we want non-call exceptions
21757 using the EABI unwinder, to prevent faulting instructions from being
21758 swapped with a stack adjustment. */
21759 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21760 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21761 && cfun
->can_throw_non_call_exceptions
))
21762 emit_insn (gen_blockage ());
21764 /* If the link register is being kept alive, with the return address in it,
21765 then make sure that it does not get reused by the ce2 pass. */
21766 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21767 cfun
->machine
->lr_save_eliminated
= 1;
21770 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21772 arm_print_condition (FILE *stream
)
21774 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21776 /* Branch conversion is not implemented for Thumb-2. */
21779 output_operand_lossage ("predicated Thumb instruction");
21782 if (current_insn_predicate
!= NULL
)
21784 output_operand_lossage
21785 ("predicated instruction in conditional sequence");
21789 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21791 else if (current_insn_predicate
)
21793 enum arm_cond_code code
;
21797 output_operand_lossage ("predicated Thumb instruction");
21801 code
= get_arm_condition_code (current_insn_predicate
);
21802 fputs (arm_condition_codes
[code
], stream
);
21807 /* Globally reserved letters: acln
21808 Puncutation letters currently used: @_|?().!#
21809 Lower case letters currently used: bcdefhimpqtvwxyz
21810 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21811 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21813 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21815 If CODE is 'd', then the X is a condition operand and the instruction
21816 should only be executed if the condition is true.
21817 if CODE is 'D', then the X is a condition operand and the instruction
21818 should only be executed if the condition is false: however, if the mode
21819 of the comparison is CCFPEmode, then always execute the instruction -- we
21820 do this because in these circumstances !GE does not necessarily imply LT;
21821 in these cases the instruction pattern will take care to make sure that
21822 an instruction containing %d will follow, thereby undoing the effects of
21823 doing this instruction unconditionally.
21824 If CODE is 'N' then X is a floating point operand that must be negated
21826 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21827 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21829 arm_print_operand (FILE *stream
, rtx x
, int code
)
21834 fputs (ASM_COMMENT_START
, stream
);
21838 fputs (user_label_prefix
, stream
);
21842 fputs (REGISTER_PREFIX
, stream
);
21846 arm_print_condition (stream
);
21850 /* The current condition code for a condition code setting instruction.
21851 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21852 fputc('s', stream
);
21853 arm_print_condition (stream
);
21857 /* If the instruction is conditionally executed then print
21858 the current condition code, otherwise print 's'. */
21859 gcc_assert (TARGET_THUMB2
);
21860 if (current_insn_predicate
)
21861 arm_print_condition (stream
);
21863 fputc('s', stream
);
21866 /* %# is a "break" sequence. It doesn't output anything, but is used to
21867 separate e.g. operand numbers from following text, if that text consists
21868 of further digits which we don't want to be part of the operand
21876 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21877 fprintf (stream
, "%s", fp_const_from_val (&r
));
21881 /* An integer or symbol address without a preceding # sign. */
21883 switch (GET_CODE (x
))
21886 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21890 output_addr_const (stream
, x
);
21894 if (GET_CODE (XEXP (x
, 0)) == PLUS
21895 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21897 output_addr_const (stream
, x
);
21900 /* Fall through. */
21903 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21907 /* An integer that we want to print in HEX. */
21909 switch (GET_CODE (x
))
21912 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21916 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21921 if (CONST_INT_P (x
))
21924 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21925 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21929 putc ('~', stream
);
21930 output_addr_const (stream
, x
);
21935 /* Print the log2 of a CONST_INT. */
21939 if (!CONST_INT_P (x
)
21940 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21941 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21943 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21948 /* The low 16 bits of an immediate constant. */
21949 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21953 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21957 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21965 shift
= shift_op (x
, &val
);
21969 fprintf (stream
, ", %s ", shift
);
21971 arm_print_operand (stream
, XEXP (x
, 1), 0);
21973 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21978 /* An explanation of the 'Q', 'R' and 'H' register operands:
21980 In a pair of registers containing a DI or DF value the 'Q'
21981 operand returns the register number of the register containing
21982 the least significant part of the value. The 'R' operand returns
21983 the register number of the register containing the most
21984 significant part of the value.
21986 The 'H' operand returns the higher of the two register numbers.
21987 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21988 same as the 'Q' operand, since the most significant part of the
21989 value is held in the lower number register. The reverse is true
21990 on systems where WORDS_BIG_ENDIAN is false.
21992 The purpose of these operands is to distinguish between cases
21993 where the endian-ness of the values is important (for example
21994 when they are added together), and cases where the endian-ness
21995 is irrelevant, but the order of register operations is important.
21996 For example when loading a value from memory into a register
21997 pair, the endian-ness does not matter. Provided that the value
21998 from the lower memory address is put into the lower numbered
21999 register, and the value from the higher address is put into the
22000 higher numbered register, the load will work regardless of whether
22001 the value being loaded is big-wordian or little-wordian. The
22002 order of the two register loads can matter however, if the address
22003 of the memory location is actually held in one of the registers
22004 being overwritten by the load.
22006 The 'Q' and 'R' constraints are also available for 64-bit
22009 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22011 rtx part
= gen_lowpart (SImode
, x
);
22012 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22016 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22018 output_operand_lossage ("invalid operand for code '%c'", code
);
22022 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22026 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22028 machine_mode mode
= GET_MODE (x
);
22031 if (mode
== VOIDmode
)
22033 part
= gen_highpart_mode (SImode
, mode
, x
);
22034 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22038 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22040 output_operand_lossage ("invalid operand for code '%c'", code
);
22044 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22048 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22050 output_operand_lossage ("invalid operand for code '%c'", code
);
22054 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22058 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22060 output_operand_lossage ("invalid operand for code '%c'", code
);
22064 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22068 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22070 output_operand_lossage ("invalid operand for code '%c'", code
);
22074 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22078 asm_fprintf (stream
, "%r",
22079 REG_P (XEXP (x
, 0))
22080 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22084 asm_fprintf (stream
, "{%r-%r}",
22086 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22089 /* Like 'M', but writing doubleword vector registers, for use by Neon
22093 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22094 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22096 asm_fprintf (stream
, "{d%d}", regno
);
22098 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22103 /* CONST_TRUE_RTX means always -- that's the default. */
22104 if (x
== const_true_rtx
)
22107 if (!COMPARISON_P (x
))
22109 output_operand_lossage ("invalid operand for code '%c'", code
);
22113 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22118 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22119 want to do that. */
22120 if (x
== const_true_rtx
)
22122 output_operand_lossage ("instruction never executed");
22125 if (!COMPARISON_P (x
))
22127 output_operand_lossage ("invalid operand for code '%c'", code
);
22131 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22132 (get_arm_condition_code (x
))],
22142 /* Former Maverick support, removed after GCC-4.7. */
22143 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22148 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22149 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22150 /* Bad value for wCG register number. */
22152 output_operand_lossage ("invalid operand for code '%c'", code
);
22157 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22160 /* Print an iWMMXt control register name. */
22162 if (!CONST_INT_P (x
)
22164 || INTVAL (x
) >= 16)
22165 /* Bad value for wC register number. */
22167 output_operand_lossage ("invalid operand for code '%c'", code
);
22173 static const char * wc_reg_names
[16] =
22175 "wCID", "wCon", "wCSSF", "wCASF",
22176 "wC4", "wC5", "wC6", "wC7",
22177 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22178 "wC12", "wC13", "wC14", "wC15"
22181 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22185 /* Print the high single-precision register of a VFP double-precision
22189 machine_mode mode
= GET_MODE (x
);
22192 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22194 output_operand_lossage ("invalid operand for code '%c'", code
);
22199 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22201 output_operand_lossage ("invalid operand for code '%c'", code
);
22205 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22209 /* Print a VFP/Neon double precision or quad precision register name. */
22213 machine_mode mode
= GET_MODE (x
);
22214 int is_quad
= (code
== 'q');
22217 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22219 output_operand_lossage ("invalid operand for code '%c'", code
);
22224 || !IS_VFP_REGNUM (REGNO (x
)))
22226 output_operand_lossage ("invalid operand for code '%c'", code
);
22231 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22232 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22234 output_operand_lossage ("invalid operand for code '%c'", code
);
22238 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22239 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22243 /* These two codes print the low/high doubleword register of a Neon quad
22244 register, respectively. For pair-structure types, can also print
22245 low/high quadword registers. */
22249 machine_mode mode
= GET_MODE (x
);
22252 if ((GET_MODE_SIZE (mode
) != 16
22253 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22255 output_operand_lossage ("invalid operand for code '%c'", code
);
22260 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22262 output_operand_lossage ("invalid operand for code '%c'", code
);
22266 if (GET_MODE_SIZE (mode
) == 16)
22267 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22268 + (code
== 'f' ? 1 : 0));
22270 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22271 + (code
== 'f' ? 1 : 0));
22275 /* Print a VFPv3 floating-point constant, represented as an integer
22279 int index
= vfp3_const_double_index (x
);
22280 gcc_assert (index
!= -1);
22281 fprintf (stream
, "%d", index
);
22285 /* Print bits representing opcode features for Neon.
22287 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22288 and polynomials as unsigned.
22290 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22292 Bit 2 is 1 for rounding functions, 0 otherwise. */
22294 /* Identify the type as 's', 'u', 'p' or 'f'. */
22297 HOST_WIDE_INT bits
= INTVAL (x
);
22298 fputc ("uspf"[bits
& 3], stream
);
22302 /* Likewise, but signed and unsigned integers are both 'i'. */
22305 HOST_WIDE_INT bits
= INTVAL (x
);
22306 fputc ("iipf"[bits
& 3], stream
);
22310 /* As for 'T', but emit 'u' instead of 'p'. */
22313 HOST_WIDE_INT bits
= INTVAL (x
);
22314 fputc ("usuf"[bits
& 3], stream
);
22318 /* Bit 2: rounding (vs none). */
22321 HOST_WIDE_INT bits
= INTVAL (x
);
22322 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22326 /* Memory operand for vld1/vst1 instruction. */
22330 bool postinc
= FALSE
;
22331 rtx postinc_reg
= NULL
;
22332 unsigned align
, memsize
, align_bits
;
22334 gcc_assert (MEM_P (x
));
22335 addr
= XEXP (x
, 0);
22336 if (GET_CODE (addr
) == POST_INC
)
22339 addr
= XEXP (addr
, 0);
22341 if (GET_CODE (addr
) == POST_MODIFY
)
22343 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22344 addr
= XEXP (addr
, 0);
22346 asm_fprintf (stream
, "[%r", REGNO (addr
));
22348 /* We know the alignment of this access, so we can emit a hint in the
22349 instruction (for some alignments) as an aid to the memory subsystem
22351 align
= MEM_ALIGN (x
) >> 3;
22352 memsize
= MEM_SIZE (x
);
22354 /* Only certain alignment specifiers are supported by the hardware. */
22355 if (memsize
== 32 && (align
% 32) == 0)
22357 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22359 else if (memsize
>= 8 && (align
% 8) == 0)
22364 if (align_bits
!= 0)
22365 asm_fprintf (stream
, ":%d", align_bits
);
22367 asm_fprintf (stream
, "]");
22370 fputs("!", stream
);
22372 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22380 gcc_assert (MEM_P (x
));
22381 addr
= XEXP (x
, 0);
22382 gcc_assert (REG_P (addr
));
22383 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22387 /* Translate an S register number into a D register number and element index. */
22390 machine_mode mode
= GET_MODE (x
);
22393 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22395 output_operand_lossage ("invalid operand for code '%c'", code
);
22400 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22402 output_operand_lossage ("invalid operand for code '%c'", code
);
22406 regno
= regno
- FIRST_VFP_REGNUM
;
22407 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22412 gcc_assert (CONST_DOUBLE_P (x
));
22414 result
= vfp3_const_double_for_fract_bits (x
);
22416 result
= vfp3_const_double_for_bits (x
);
22417 fprintf (stream
, "#%d", result
);
22420 /* Register specifier for vld1.16/vst1.16. Translate the S register
22421 number into a D register number and element index. */
22424 machine_mode mode
= GET_MODE (x
);
22427 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22429 output_operand_lossage ("invalid operand for code '%c'", code
);
22434 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22436 output_operand_lossage ("invalid operand for code '%c'", code
);
22440 regno
= regno
- FIRST_VFP_REGNUM
;
22441 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22448 output_operand_lossage ("missing operand");
22452 switch (GET_CODE (x
))
22455 asm_fprintf (stream
, "%r", REGNO (x
));
22459 output_address (GET_MODE (x
), XEXP (x
, 0));
22465 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22466 sizeof (fpstr
), 0, 1);
22467 fprintf (stream
, "#%s", fpstr
);
22472 gcc_assert (GET_CODE (x
) != NEG
);
22473 fputc ('#', stream
);
22474 if (GET_CODE (x
) == HIGH
)
22476 fputs (":lower16:", stream
);
22480 output_addr_const (stream
, x
);
22486 /* Target hook for printing a memory address. */
22488 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22492 int is_minus
= GET_CODE (x
) == MINUS
;
22495 asm_fprintf (stream
, "[%r]", REGNO (x
));
22496 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22498 rtx base
= XEXP (x
, 0);
22499 rtx index
= XEXP (x
, 1);
22500 HOST_WIDE_INT offset
= 0;
22502 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22504 /* Ensure that BASE is a register. */
22505 /* (one of them must be). */
22506 /* Also ensure the SP is not used as in index register. */
22507 std::swap (base
, index
);
22509 switch (GET_CODE (index
))
22512 offset
= INTVAL (index
);
22515 asm_fprintf (stream
, "[%r, #%wd]",
22516 REGNO (base
), offset
);
22520 asm_fprintf (stream
, "[%r, %s%r]",
22521 REGNO (base
), is_minus
? "-" : "",
22531 asm_fprintf (stream
, "[%r, %s%r",
22532 REGNO (base
), is_minus
? "-" : "",
22533 REGNO (XEXP (index
, 0)));
22534 arm_print_operand (stream
, index
, 'S');
22535 fputs ("]", stream
);
22540 gcc_unreachable ();
22543 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22544 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22546 gcc_assert (REG_P (XEXP (x
, 0)));
22548 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22549 asm_fprintf (stream
, "[%r, #%s%d]!",
22550 REGNO (XEXP (x
, 0)),
22551 GET_CODE (x
) == PRE_DEC
? "-" : "",
22552 GET_MODE_SIZE (mode
));
22554 asm_fprintf (stream
, "[%r], #%s%d",
22555 REGNO (XEXP (x
, 0)),
22556 GET_CODE (x
) == POST_DEC
? "-" : "",
22557 GET_MODE_SIZE (mode
));
22559 else if (GET_CODE (x
) == PRE_MODIFY
)
22561 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22562 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22563 asm_fprintf (stream
, "#%wd]!",
22564 INTVAL (XEXP (XEXP (x
, 1), 1)));
22566 asm_fprintf (stream
, "%r]!",
22567 REGNO (XEXP (XEXP (x
, 1), 1)));
22569 else if (GET_CODE (x
) == POST_MODIFY
)
22571 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22572 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22573 asm_fprintf (stream
, "#%wd",
22574 INTVAL (XEXP (XEXP (x
, 1), 1)));
22576 asm_fprintf (stream
, "%r",
22577 REGNO (XEXP (XEXP (x
, 1), 1)));
22579 else output_addr_const (stream
, x
);
22584 asm_fprintf (stream
, "[%r]", REGNO (x
));
22585 else if (GET_CODE (x
) == POST_INC
)
22586 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22587 else if (GET_CODE (x
) == PLUS
)
22589 gcc_assert (REG_P (XEXP (x
, 0)));
22590 if (CONST_INT_P (XEXP (x
, 1)))
22591 asm_fprintf (stream
, "[%r, #%wd]",
22592 REGNO (XEXP (x
, 0)),
22593 INTVAL (XEXP (x
, 1)));
22595 asm_fprintf (stream
, "[%r, %r]",
22596 REGNO (XEXP (x
, 0)),
22597 REGNO (XEXP (x
, 1)));
22600 output_addr_const (stream
, x
);
22604 /* Target hook for indicating whether a punctuation character for
22605 TARGET_PRINT_OPERAND is valid. */
22607 arm_print_operand_punct_valid_p (unsigned char code
)
22609 return (code
== '@' || code
== '|' || code
== '.'
22610 || code
== '(' || code
== ')' || code
== '#'
22611 || (TARGET_32BIT
&& (code
== '?'))
22612 || (TARGET_THUMB2
&& (code
== '!'))
22613 || (TARGET_THUMB
&& (code
== '_')));
22616 /* Target hook for assembling integer objects. The ARM version needs to
22617 handle word-sized values specially. */
22619 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22623 if (size
== UNITS_PER_WORD
&& aligned_p
)
22625 fputs ("\t.word\t", asm_out_file
);
22626 output_addr_const (asm_out_file
, x
);
22628 /* Mark symbols as position independent. We only do this in the
22629 .text segment, not in the .data segment. */
22630 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22631 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22633 /* See legitimize_pic_address for an explanation of the
22634 TARGET_VXWORKS_RTP check. */
22635 /* References to weak symbols cannot be resolved locally:
22636 they may be overridden by a non-weak definition at link
22638 if (!arm_pic_data_is_text_relative
22639 || (GET_CODE (x
) == SYMBOL_REF
22640 && (!SYMBOL_REF_LOCAL_P (x
)
22641 || (SYMBOL_REF_DECL (x
)
22642 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22643 fputs ("(GOT)", asm_out_file
);
22645 fputs ("(GOTOFF)", asm_out_file
);
22647 fputc ('\n', asm_out_file
);
22651 mode
= GET_MODE (x
);
22653 if (arm_vector_mode_supported_p (mode
))
22657 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22659 units
= CONST_VECTOR_NUNITS (x
);
22660 size
= GET_MODE_UNIT_SIZE (mode
);
22662 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22663 for (i
= 0; i
< units
; i
++)
22665 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22667 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22670 for (i
= 0; i
< units
; i
++)
22672 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22674 (*CONST_DOUBLE_REAL_VALUE (elt
),
22675 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
22676 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22682 return default_assemble_integer (x
, size
, aligned_p
);
22686 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22690 if (!TARGET_AAPCS_BASED
)
22693 default_named_section_asm_out_constructor
22694 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22698 /* Put these in the .init_array section, using a special relocation. */
22699 if (priority
!= DEFAULT_INIT_PRIORITY
)
22702 sprintf (buf
, "%s.%.5u",
22703 is_ctor
? ".init_array" : ".fini_array",
22705 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
22712 switch_to_section (s
);
22713 assemble_align (POINTER_SIZE
);
22714 fputs ("\t.word\t", asm_out_file
);
22715 output_addr_const (asm_out_file
, symbol
);
22716 fputs ("(target1)\n", asm_out_file
);
22719 /* Add a function to the list of static constructors. */
22722 arm_elf_asm_constructor (rtx symbol
, int priority
)
22724 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22727 /* Add a function to the list of static destructors. */
22730 arm_elf_asm_destructor (rtx symbol
, int priority
)
22732 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22735 /* A finite state machine takes care of noticing whether or not instructions
22736 can be conditionally executed, and thus decrease execution time and code
22737 size by deleting branch instructions. The fsm is controlled by
22738 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22740 /* The state of the fsm controlling condition codes are:
22741 0: normal, do nothing special
22742 1: make ASM_OUTPUT_OPCODE not output this instruction
22743 2: make ASM_OUTPUT_OPCODE not output this instruction
22744 3: make instructions conditional
22745 4: make instructions conditional
22747 State transitions (state->state by whom under condition):
22748 0 -> 1 final_prescan_insn if the `target' is a label
22749 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22750 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22751 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22752 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22753 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22754 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22755 (the target insn is arm_target_insn).
22757 If the jump clobbers the conditions then we use states 2 and 4.
22759 A similar thing can be done with conditional return insns.
22761 XXX In case the `target' is an unconditional branch, this conditionalising
22762 of the instructions always reduces code size, but not always execution
22763 time. But then, I want to reduce the code size to somewhere near what
22764 /bin/cc produces. */
22766 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22767 instructions. When a COND_EXEC instruction is seen the subsequent
22768 instructions are scanned so that multiple conditional instructions can be
22769 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22770 specify the length and true/false mask for the IT block. These will be
22771 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22773 /* Returns the index of the ARM condition code string in
22774 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22775 COMPARISON should be an rtx like `(eq (...) (...))'. */
22778 maybe_get_arm_condition_code (rtx comparison
)
22780 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22781 enum arm_cond_code code
;
22782 enum rtx_code comp_code
= GET_CODE (comparison
);
22784 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22785 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22786 XEXP (comparison
, 1));
22790 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22791 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22792 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22793 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22794 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22795 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22796 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22797 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22798 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22799 case E_CC_DLTUmode
: code
= ARM_CC
;
22802 if (comp_code
== EQ
)
22803 return ARM_INVERSE_CONDITION_CODE (code
);
22804 if (comp_code
== NE
)
22808 case E_CC_NOOVmode
:
22811 case NE
: return ARM_NE
;
22812 case EQ
: return ARM_EQ
;
22813 case GE
: return ARM_PL
;
22814 case LT
: return ARM_MI
;
22815 default: return ARM_NV
;
22821 case NE
: return ARM_NE
;
22822 case EQ
: return ARM_EQ
;
22823 default: return ARM_NV
;
22829 case NE
: return ARM_MI
;
22830 case EQ
: return ARM_PL
;
22831 default: return ARM_NV
;
22836 /* We can handle all cases except UNEQ and LTGT. */
22839 case GE
: return ARM_GE
;
22840 case GT
: return ARM_GT
;
22841 case LE
: return ARM_LS
;
22842 case LT
: return ARM_MI
;
22843 case NE
: return ARM_NE
;
22844 case EQ
: return ARM_EQ
;
22845 case ORDERED
: return ARM_VC
;
22846 case UNORDERED
: return ARM_VS
;
22847 case UNLT
: return ARM_LT
;
22848 case UNLE
: return ARM_LE
;
22849 case UNGT
: return ARM_HI
;
22850 case UNGE
: return ARM_PL
;
22851 /* UNEQ and LTGT do not have a representation. */
22852 case UNEQ
: /* Fall through. */
22853 case LTGT
: /* Fall through. */
22854 default: return ARM_NV
;
22860 case NE
: return ARM_NE
;
22861 case EQ
: return ARM_EQ
;
22862 case GE
: return ARM_LE
;
22863 case GT
: return ARM_LT
;
22864 case LE
: return ARM_GE
;
22865 case LT
: return ARM_GT
;
22866 case GEU
: return ARM_LS
;
22867 case GTU
: return ARM_CC
;
22868 case LEU
: return ARM_CS
;
22869 case LTU
: return ARM_HI
;
22870 default: return ARM_NV
;
22876 case LTU
: return ARM_CS
;
22877 case GEU
: return ARM_CC
;
22878 case NE
: return ARM_CS
;
22879 case EQ
: return ARM_CC
;
22880 default: return ARM_NV
;
22886 case NE
: return ARM_NE
;
22887 case EQ
: return ARM_EQ
;
22888 case GEU
: return ARM_CS
;
22889 case GTU
: return ARM_HI
;
22890 case LEU
: return ARM_LS
;
22891 case LTU
: return ARM_CC
;
22892 default: return ARM_NV
;
22898 case GE
: return ARM_GE
;
22899 case LT
: return ARM_LT
;
22900 case GEU
: return ARM_CS
;
22901 case LTU
: return ARM_CC
;
22902 default: return ARM_NV
;
22908 case NE
: return ARM_VS
;
22909 case EQ
: return ARM_VC
;
22910 default: return ARM_NV
;
22916 case NE
: return ARM_NE
;
22917 case EQ
: return ARM_EQ
;
22918 case GE
: return ARM_GE
;
22919 case GT
: return ARM_GT
;
22920 case LE
: return ARM_LE
;
22921 case LT
: return ARM_LT
;
22922 case GEU
: return ARM_CS
;
22923 case GTU
: return ARM_HI
;
22924 case LEU
: return ARM_LS
;
22925 case LTU
: return ARM_CC
;
22926 default: return ARM_NV
;
22929 default: gcc_unreachable ();
22933 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22934 static enum arm_cond_code
22935 get_arm_condition_code (rtx comparison
)
22937 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22938 gcc_assert (code
!= ARM_NV
);
22942 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22943 code registers when not targetting Thumb1. The VFP condition register
22944 only exists when generating hard-float code. */
22946 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
22952 *p2
= TARGET_HARD_FLOAT
? VFPCC_REGNUM
: INVALID_REGNUM
;
22956 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22959 thumb2_final_prescan_insn (rtx_insn
*insn
)
22961 rtx_insn
*first_insn
= insn
;
22962 rtx body
= PATTERN (insn
);
22964 enum arm_cond_code code
;
22969 /* max_insns_skipped in the tune was already taken into account in the
22970 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22971 just emit the IT blocks as we can. It does not make sense to split
22973 max
= MAX_INSN_PER_IT_BLOCK
;
22975 /* Remove the previous insn from the count of insns to be output. */
22976 if (arm_condexec_count
)
22977 arm_condexec_count
--;
22979 /* Nothing to do if we are already inside a conditional block. */
22980 if (arm_condexec_count
)
22983 if (GET_CODE (body
) != COND_EXEC
)
22986 /* Conditional jumps are implemented directly. */
22990 predicate
= COND_EXEC_TEST (body
);
22991 arm_current_cc
= get_arm_condition_code (predicate
);
22993 n
= get_attr_ce_count (insn
);
22994 arm_condexec_count
= 1;
22995 arm_condexec_mask
= (1 << n
) - 1;
22996 arm_condexec_masklen
= n
;
22997 /* See if subsequent instructions can be combined into the same block. */
23000 insn
= next_nonnote_insn (insn
);
23002 /* Jumping into the middle of an IT block is illegal, so a label or
23003 barrier terminates the block. */
23004 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23007 body
= PATTERN (insn
);
23008 /* USE and CLOBBER aren't really insns, so just skip them. */
23009 if (GET_CODE (body
) == USE
23010 || GET_CODE (body
) == CLOBBER
)
23013 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23014 if (GET_CODE (body
) != COND_EXEC
)
23016 /* Maximum number of conditionally executed instructions in a block. */
23017 n
= get_attr_ce_count (insn
);
23018 if (arm_condexec_masklen
+ n
> max
)
23021 predicate
= COND_EXEC_TEST (body
);
23022 code
= get_arm_condition_code (predicate
);
23023 mask
= (1 << n
) - 1;
23024 if (arm_current_cc
== code
)
23025 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23026 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23029 arm_condexec_count
++;
23030 arm_condexec_masklen
+= n
;
23032 /* A jump must be the last instruction in a conditional block. */
23036 /* Restore recog_data (getting the attributes of other insns can
23037 destroy this array, but final.c assumes that it remains intact
23038 across this call). */
23039 extract_constrain_insn_cached (first_insn
);
23043 arm_final_prescan_insn (rtx_insn
*insn
)
23045 /* BODY will hold the body of INSN. */
23046 rtx body
= PATTERN (insn
);
23048 /* This will be 1 if trying to repeat the trick, and things need to be
23049 reversed if it appears to fail. */
23052 /* If we start with a return insn, we only succeed if we find another one. */
23053 int seeking_return
= 0;
23054 enum rtx_code return_code
= UNKNOWN
;
23056 /* START_INSN will hold the insn from where we start looking. This is the
23057 first insn after the following code_label if REVERSE is true. */
23058 rtx_insn
*start_insn
= insn
;
23060 /* If in state 4, check if the target branch is reached, in order to
23061 change back to state 0. */
23062 if (arm_ccfsm_state
== 4)
23064 if (insn
== arm_target_insn
)
23066 arm_target_insn
= NULL
;
23067 arm_ccfsm_state
= 0;
23072 /* If in state 3, it is possible to repeat the trick, if this insn is an
23073 unconditional branch to a label, and immediately following this branch
23074 is the previous target label which is only used once, and the label this
23075 branch jumps to is not too far off. */
23076 if (arm_ccfsm_state
== 3)
23078 if (simplejump_p (insn
))
23080 start_insn
= next_nonnote_insn (start_insn
);
23081 if (BARRIER_P (start_insn
))
23083 /* XXX Isn't this always a barrier? */
23084 start_insn
= next_nonnote_insn (start_insn
);
23086 if (LABEL_P (start_insn
)
23087 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23088 && LABEL_NUSES (start_insn
) == 1)
23093 else if (ANY_RETURN_P (body
))
23095 start_insn
= next_nonnote_insn (start_insn
);
23096 if (BARRIER_P (start_insn
))
23097 start_insn
= next_nonnote_insn (start_insn
);
23098 if (LABEL_P (start_insn
)
23099 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23100 && LABEL_NUSES (start_insn
) == 1)
23103 seeking_return
= 1;
23104 return_code
= GET_CODE (body
);
23113 gcc_assert (!arm_ccfsm_state
|| reverse
);
23114 if (!JUMP_P (insn
))
23117 /* This jump might be paralleled with a clobber of the condition codes
23118 the jump should always come first */
23119 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23120 body
= XVECEXP (body
, 0, 0);
23123 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23124 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23127 int fail
= FALSE
, succeed
= FALSE
;
23128 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23129 int then_not_else
= TRUE
;
23130 rtx_insn
*this_insn
= start_insn
;
23133 /* Register the insn jumped to. */
23136 if (!seeking_return
)
23137 label
= XEXP (SET_SRC (body
), 0);
23139 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23140 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23141 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23143 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23144 then_not_else
= FALSE
;
23146 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23148 seeking_return
= 1;
23149 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23151 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23153 seeking_return
= 1;
23154 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23155 then_not_else
= FALSE
;
23158 gcc_unreachable ();
23160 /* See how many insns this branch skips, and what kind of insns. If all
23161 insns are okay, and the label or unconditional branch to the same
23162 label is not too far away, succeed. */
23163 for (insns_skipped
= 0;
23164 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23168 this_insn
= next_nonnote_insn (this_insn
);
23172 switch (GET_CODE (this_insn
))
23175 /* Succeed if it is the target label, otherwise fail since
23176 control falls in from somewhere else. */
23177 if (this_insn
== label
)
23179 arm_ccfsm_state
= 1;
23187 /* Succeed if the following insn is the target label.
23189 If return insns are used then the last insn in a function
23190 will be a barrier. */
23191 this_insn
= next_nonnote_insn (this_insn
);
23192 if (this_insn
&& this_insn
== label
)
23194 arm_ccfsm_state
= 1;
23202 /* The AAPCS says that conditional calls should not be
23203 used since they make interworking inefficient (the
23204 linker can't transform BL<cond> into BLX). That's
23205 only a problem if the machine has BLX. */
23212 /* Succeed if the following insn is the target label, or
23213 if the following two insns are a barrier and the
23215 this_insn
= next_nonnote_insn (this_insn
);
23216 if (this_insn
&& BARRIER_P (this_insn
))
23217 this_insn
= next_nonnote_insn (this_insn
);
23219 if (this_insn
&& this_insn
== label
23220 && insns_skipped
< max_insns_skipped
)
23222 arm_ccfsm_state
= 1;
23230 /* If this is an unconditional branch to the same label, succeed.
23231 If it is to another label, do nothing. If it is conditional,
23233 /* XXX Probably, the tests for SET and the PC are
23236 scanbody
= PATTERN (this_insn
);
23237 if (GET_CODE (scanbody
) == SET
23238 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23240 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23241 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23243 arm_ccfsm_state
= 2;
23246 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23249 /* Fail if a conditional return is undesirable (e.g. on a
23250 StrongARM), but still allow this if optimizing for size. */
23251 else if (GET_CODE (scanbody
) == return_code
23252 && !use_return_insn (TRUE
, NULL
)
23255 else if (GET_CODE (scanbody
) == return_code
)
23257 arm_ccfsm_state
= 2;
23260 else if (GET_CODE (scanbody
) == PARALLEL
)
23262 switch (get_attr_conds (this_insn
))
23272 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23277 /* Instructions using or affecting the condition codes make it
23279 scanbody
= PATTERN (this_insn
);
23280 if (!(GET_CODE (scanbody
) == SET
23281 || GET_CODE (scanbody
) == PARALLEL
)
23282 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23292 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23293 arm_target_label
= CODE_LABEL_NUMBER (label
);
23296 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23298 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23300 this_insn
= next_nonnote_insn (this_insn
);
23301 gcc_assert (!this_insn
23302 || (!BARRIER_P (this_insn
)
23303 && !LABEL_P (this_insn
)));
23307 /* Oh, dear! we ran off the end.. give up. */
23308 extract_constrain_insn_cached (insn
);
23309 arm_ccfsm_state
= 0;
23310 arm_target_insn
= NULL
;
23313 arm_target_insn
= this_insn
;
23316 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23319 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23321 if (reverse
|| then_not_else
)
23322 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23325 /* Restore recog_data (getting the attributes of other insns can
23326 destroy this array, but final.c assumes that it remains intact
23327 across this call. */
23328 extract_constrain_insn_cached (insn
);
23332 /* Output IT instructions. */
23334 thumb2_asm_output_opcode (FILE * stream
)
23339 if (arm_condexec_mask
)
23341 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23342 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23344 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23345 arm_condition_codes
[arm_current_cc
]);
23346 arm_condexec_mask
= 0;
23350 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23352 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23354 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23355 return (regno
== CC_REGNUM
23356 || (TARGET_HARD_FLOAT
23357 && regno
== VFPCC_REGNUM
));
23359 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23363 /* For the Thumb we only allow values bigger than SImode in
23364 registers 0 - 6, so that there is always a second low
23365 register available to hold the upper part of the value.
23366 We probably we ought to ensure that the register is the
23367 start of an even numbered register pair. */
23368 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23370 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23372 if (mode
== SFmode
|| mode
== SImode
)
23373 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23375 if (mode
== DFmode
)
23376 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23378 if (mode
== HFmode
)
23379 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23381 /* VFP registers can hold HImode values. */
23382 if (mode
== HImode
)
23383 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23386 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23387 || (VALID_NEON_QREG_MODE (mode
)
23388 && NEON_REGNO_OK_FOR_QUAD (regno
))
23389 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23390 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23391 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23392 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23393 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23398 if (TARGET_REALLY_IWMMXT
)
23400 if (IS_IWMMXT_GR_REGNUM (regno
))
23401 return mode
== SImode
;
23403 if (IS_IWMMXT_REGNUM (regno
))
23404 return VALID_IWMMXT_REG_MODE (mode
);
23407 /* We allow almost any value to be stored in the general registers.
23408 Restrict doubleword quantities to even register pairs in ARM state
23409 so that we can use ldrd. Do not allow very large Neon structure
23410 opaque modes in general registers; they would use too many. */
23411 if (regno
<= LAST_ARM_REGNUM
)
23413 if (ARM_NUM_REGS (mode
) > 4)
23419 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23422 if (regno
== FRAME_POINTER_REGNUM
23423 || regno
== ARG_POINTER_REGNUM
)
23424 /* We only allow integers in the fake hard registers. */
23425 return GET_MODE_CLASS (mode
) == MODE_INT
;
23430 /* Implement MODES_TIEABLE_P. */
23433 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23435 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23438 /* We specifically want to allow elements of "structure" modes to
23439 be tieable to the structure. This more general condition allows
23440 other rarer situations too. */
23442 && (VALID_NEON_DREG_MODE (mode1
)
23443 || VALID_NEON_QREG_MODE (mode1
)
23444 || VALID_NEON_STRUCT_MODE (mode1
))
23445 && (VALID_NEON_DREG_MODE (mode2
)
23446 || VALID_NEON_QREG_MODE (mode2
)
23447 || VALID_NEON_STRUCT_MODE (mode2
)))
23453 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23454 not used in arm mode. */
23457 arm_regno_class (int regno
)
23459 if (regno
== PC_REGNUM
)
23464 if (regno
== STACK_POINTER_REGNUM
)
23466 if (regno
== CC_REGNUM
)
23473 if (TARGET_THUMB2
&& regno
< 8)
23476 if ( regno
<= LAST_ARM_REGNUM
23477 || regno
== FRAME_POINTER_REGNUM
23478 || regno
== ARG_POINTER_REGNUM
)
23479 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23481 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23482 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23484 if (IS_VFP_REGNUM (regno
))
23486 if (regno
<= D7_VFP_REGNUM
)
23487 return VFP_D0_D7_REGS
;
23488 else if (regno
<= LAST_LO_VFP_REGNUM
)
23489 return VFP_LO_REGS
;
23491 return VFP_HI_REGS
;
23494 if (IS_IWMMXT_REGNUM (regno
))
23495 return IWMMXT_REGS
;
23497 if (IS_IWMMXT_GR_REGNUM (regno
))
23498 return IWMMXT_GR_REGS
;
23503 /* Handle a special case when computing the offset
23504 of an argument from the frame pointer. */
23506 arm_debugger_arg_offset (int value
, rtx addr
)
23510 /* We are only interested if dbxout_parms() failed to compute the offset. */
23514 /* We can only cope with the case where the address is held in a register. */
23518 /* If we are using the frame pointer to point at the argument, then
23519 an offset of 0 is correct. */
23520 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23523 /* If we are using the stack pointer to point at the
23524 argument, then an offset of 0 is correct. */
23525 /* ??? Check this is consistent with thumb2 frame layout. */
23526 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23527 && REGNO (addr
) == SP_REGNUM
)
23530 /* Oh dear. The argument is pointed to by a register rather
23531 than being held in a register, or being stored at a known
23532 offset from the frame pointer. Since GDB only understands
23533 those two kinds of argument we must translate the address
23534 held in the register into an offset from the frame pointer.
23535 We do this by searching through the insns for the function
23536 looking to see where this register gets its value. If the
23537 register is initialized from the frame pointer plus an offset
23538 then we are in luck and we can continue, otherwise we give up.
23540 This code is exercised by producing debugging information
23541 for a function with arguments like this:
23543 double func (double a, double b, int c, double d) {return d;}
23545 Without this code the stab for parameter 'd' will be set to
23546 an offset of 0 from the frame pointer, rather than 8. */
23548 /* The if() statement says:
23550 If the insn is a normal instruction
23551 and if the insn is setting the value in a register
23552 and if the register being set is the register holding the address of the argument
23553 and if the address is computing by an addition
23554 that involves adding to a register
23555 which is the frame pointer
23560 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23562 if ( NONJUMP_INSN_P (insn
)
23563 && GET_CODE (PATTERN (insn
)) == SET
23564 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23565 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23566 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23567 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23568 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23571 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23580 warning (0, "unable to compute real location of stacked parameter");
23581 value
= 8; /* XXX magic hack */
23587 /* Implement TARGET_PROMOTED_TYPE. */
23590 arm_promoted_type (const_tree t
)
23592 if (SCALAR_FLOAT_TYPE_P (t
)
23593 && TYPE_PRECISION (t
) == 16
23594 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23595 return float_type_node
;
23599 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23600 This simply adds HFmode as a supported mode; even though we don't
23601 implement arithmetic on this type directly, it's supported by
23602 optabs conversions, much the way the double-word arithmetic is
23603 special-cased in the default hook. */
23606 arm_scalar_mode_supported_p (scalar_mode mode
)
23608 if (mode
== HFmode
)
23609 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23610 else if (ALL_FIXED_POINT_MODE_P (mode
))
23613 return default_scalar_mode_supported_p (mode
);
23616 /* Set the value of FLT_EVAL_METHOD.
23617 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23619 0: evaluate all operations and constants, whose semantic type has at
23620 most the range and precision of type float, to the range and
23621 precision of float; evaluate all other operations and constants to
23622 the range and precision of the semantic type;
23624 N, where _FloatN is a supported interchange floating type
23625 evaluate all operations and constants, whose semantic type has at
23626 most the range and precision of _FloatN type, to the range and
23627 precision of the _FloatN type; evaluate all other operations and
23628 constants to the range and precision of the semantic type;
23630 If we have the ARMv8.2-A extensions then we support _Float16 in native
23631 precision, so we should set this to 16. Otherwise, we support the type,
23632 but want to evaluate expressions in float precision, so set this to
23635 static enum flt_eval_method
23636 arm_excess_precision (enum excess_precision_type type
)
23640 case EXCESS_PRECISION_TYPE_FAST
:
23641 case EXCESS_PRECISION_TYPE_STANDARD
:
23642 /* We can calculate either in 16-bit range and precision or
23643 32-bit range and precision. Make that decision based on whether
23644 we have native support for the ARMv8.2-A 16-bit floating-point
23645 instructions or not. */
23646 return (TARGET_VFP_FP16INST
23647 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23648 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23649 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23650 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23652 gcc_unreachable ();
23654 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23658 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23659 _Float16 if we are using anything other than ieee format for 16-bit
23660 floating point. Otherwise, punt to the default implementation. */
23661 static opt_scalar_float_mode
23662 arm_floatn_mode (int n
, bool extended
)
23664 if (!extended
&& n
== 16)
23666 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
23668 return opt_scalar_float_mode ();
23671 return default_floatn_mode (n
, extended
);
23675 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23676 not to early-clobber SRC registers in the process.
23678 We assume that the operands described by SRC and DEST represent a
23679 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23680 number of components into which the copy has been decomposed. */
23682 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23686 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23687 || REGNO (operands
[0]) < REGNO (operands
[1]))
23689 for (i
= 0; i
< count
; i
++)
23691 operands
[2 * i
] = dest
[i
];
23692 operands
[2 * i
+ 1] = src
[i
];
23697 for (i
= 0; i
< count
; i
++)
23699 operands
[2 * i
] = dest
[count
- i
- 1];
23700 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23705 /* Split operands into moves from op[1] + op[2] into op[0]. */
23708 neon_split_vcombine (rtx operands
[3])
23710 unsigned int dest
= REGNO (operands
[0]);
23711 unsigned int src1
= REGNO (operands
[1]);
23712 unsigned int src2
= REGNO (operands
[2]);
23713 machine_mode halfmode
= GET_MODE (operands
[1]);
23714 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23715 rtx destlo
, desthi
;
23717 if (src1
== dest
&& src2
== dest
+ halfregs
)
23719 /* No-op move. Can't split to nothing; emit something. */
23720 emit_note (NOTE_INSN_DELETED
);
23724 /* Preserve register attributes for variable tracking. */
23725 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23726 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23727 GET_MODE_SIZE (halfmode
));
23729 /* Special case of reversed high/low parts. Use VSWP. */
23730 if (src2
== dest
&& src1
== dest
+ halfregs
)
23732 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23733 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23734 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23738 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23740 /* Try to avoid unnecessary moves if part of the result
23741 is in the right place already. */
23743 emit_move_insn (destlo
, operands
[1]);
23744 if (src2
!= dest
+ halfregs
)
23745 emit_move_insn (desthi
, operands
[2]);
23749 if (src2
!= dest
+ halfregs
)
23750 emit_move_insn (desthi
, operands
[2]);
23752 emit_move_insn (destlo
, operands
[1]);
23756 /* Return the number (counting from 0) of
23757 the least significant set bit in MASK. */
23760 number_of_first_bit_set (unsigned mask
)
23762 return ctz_hwi (mask
);
23765 /* Like emit_multi_reg_push, but allowing for a different set of
23766 registers to be described as saved. MASK is the set of registers
23767 to be saved; REAL_REGS is the set of registers to be described as
23768 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23771 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23773 unsigned long regno
;
23774 rtx par
[10], tmp
, reg
;
23778 /* Build the parallel of the registers actually being stored. */
23779 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23781 regno
= ctz_hwi (mask
);
23782 reg
= gen_rtx_REG (SImode
, regno
);
23785 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23787 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23792 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23793 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23794 tmp
= gen_frame_mem (BLKmode
, tmp
);
23795 tmp
= gen_rtx_SET (tmp
, par
[0]);
23798 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23799 insn
= emit_insn (tmp
);
23801 /* Always build the stack adjustment note for unwind info. */
23802 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23803 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23806 /* Build the parallel of the registers recorded as saved for unwind. */
23807 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23809 regno
= ctz_hwi (real_regs
);
23810 reg
= gen_rtx_REG (SImode
, regno
);
23812 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23813 tmp
= gen_frame_mem (SImode
, tmp
);
23814 tmp
= gen_rtx_SET (tmp
, reg
);
23815 RTX_FRAME_RELATED_P (tmp
) = 1;
23823 RTX_FRAME_RELATED_P (par
[0]) = 1;
23824 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23827 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23832 /* Emit code to push or pop registers to or from the stack. F is the
23833 assembly file. MASK is the registers to pop. */
23835 thumb_pop (FILE *f
, unsigned long mask
)
23838 int lo_mask
= mask
& 0xFF;
23842 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23844 /* Special case. Do not generate a POP PC statement here, do it in
23846 thumb_exit (f
, -1);
23850 fprintf (f
, "\tpop\t{");
23852 /* Look at the low registers first. */
23853 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23857 asm_fprintf (f
, "%r", regno
);
23859 if ((lo_mask
& ~1) != 0)
23864 if (mask
& (1 << PC_REGNUM
))
23866 /* Catch popping the PC. */
23867 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23868 || IS_CMSE_ENTRY (arm_current_func_type ()))
23870 /* The PC is never poped directly, instead
23871 it is popped into r3 and then BX is used. */
23872 fprintf (f
, "}\n");
23874 thumb_exit (f
, -1);
23883 asm_fprintf (f
, "%r", PC_REGNUM
);
23887 fprintf (f
, "}\n");
23890 /* Generate code to return from a thumb function.
23891 If 'reg_containing_return_addr' is -1, then the return address is
23892 actually on the stack, at the stack pointer. */
23894 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23896 unsigned regs_available_for_popping
;
23897 unsigned regs_to_pop
;
23899 unsigned available
;
23903 int restore_a4
= FALSE
;
23905 /* Compute the registers we need to pop. */
23909 if (reg_containing_return_addr
== -1)
23911 regs_to_pop
|= 1 << LR_REGNUM
;
23915 if (TARGET_BACKTRACE
)
23917 /* Restore the (ARM) frame pointer and stack pointer. */
23918 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23922 /* If there is nothing to pop then just emit the BX instruction and
23924 if (pops_needed
== 0)
23926 if (crtl
->calls_eh_return
)
23927 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23929 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23931 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
23932 reg_containing_return_addr
);
23933 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
23936 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23939 /* Otherwise if we are not supporting interworking and we have not created
23940 a backtrace structure and the function was not entered in ARM mode then
23941 just pop the return address straight into the PC. */
23942 else if (!TARGET_INTERWORK
23943 && !TARGET_BACKTRACE
23944 && !is_called_in_ARM_mode (current_function_decl
)
23945 && !crtl
->calls_eh_return
23946 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23948 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23952 /* Find out how many of the (return) argument registers we can corrupt. */
23953 regs_available_for_popping
= 0;
23955 /* If returning via __builtin_eh_return, the bottom three registers
23956 all contain information needed for the return. */
23957 if (crtl
->calls_eh_return
)
23961 /* If we can deduce the registers used from the function's
23962 return value. This is more reliable that examining
23963 df_regs_ever_live_p () because that will be set if the register is
23964 ever used in the function, not just if the register is used
23965 to hold a return value. */
23967 if (crtl
->return_rtx
!= 0)
23968 mode
= GET_MODE (crtl
->return_rtx
);
23970 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23972 size
= GET_MODE_SIZE (mode
);
23976 /* In a void function we can use any argument register.
23977 In a function that returns a structure on the stack
23978 we can use the second and third argument registers. */
23979 if (mode
== VOIDmode
)
23980 regs_available_for_popping
=
23981 (1 << ARG_REGISTER (1))
23982 | (1 << ARG_REGISTER (2))
23983 | (1 << ARG_REGISTER (3));
23985 regs_available_for_popping
=
23986 (1 << ARG_REGISTER (2))
23987 | (1 << ARG_REGISTER (3));
23989 else if (size
<= 4)
23990 regs_available_for_popping
=
23991 (1 << ARG_REGISTER (2))
23992 | (1 << ARG_REGISTER (3));
23993 else if (size
<= 8)
23994 regs_available_for_popping
=
23995 (1 << ARG_REGISTER (3));
23998 /* Match registers to be popped with registers into which we pop them. */
23999 for (available
= regs_available_for_popping
,
24000 required
= regs_to_pop
;
24001 required
!= 0 && available
!= 0;
24002 available
&= ~(available
& - available
),
24003 required
&= ~(required
& - required
))
24006 /* If we have any popping registers left over, remove them. */
24008 regs_available_for_popping
&= ~available
;
24010 /* Otherwise if we need another popping register we can use
24011 the fourth argument register. */
24012 else if (pops_needed
)
24014 /* If we have not found any free argument registers and
24015 reg a4 contains the return address, we must move it. */
24016 if (regs_available_for_popping
== 0
24017 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24019 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24020 reg_containing_return_addr
= LR_REGNUM
;
24022 else if (size
> 12)
24024 /* Register a4 is being used to hold part of the return value,
24025 but we have dire need of a free, low register. */
24028 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24031 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24033 /* The fourth argument register is available. */
24034 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24040 /* Pop as many registers as we can. */
24041 thumb_pop (f
, regs_available_for_popping
);
24043 /* Process the registers we popped. */
24044 if (reg_containing_return_addr
== -1)
24046 /* The return address was popped into the lowest numbered register. */
24047 regs_to_pop
&= ~(1 << LR_REGNUM
);
24049 reg_containing_return_addr
=
24050 number_of_first_bit_set (regs_available_for_popping
);
24052 /* Remove this register for the mask of available registers, so that
24053 the return address will not be corrupted by further pops. */
24054 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24057 /* If we popped other registers then handle them here. */
24058 if (regs_available_for_popping
)
24062 /* Work out which register currently contains the frame pointer. */
24063 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24065 /* Move it into the correct place. */
24066 asm_fprintf (f
, "\tmov\t%r, %r\n",
24067 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24069 /* (Temporarily) remove it from the mask of popped registers. */
24070 regs_available_for_popping
&= ~(1 << frame_pointer
);
24071 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24073 if (regs_available_for_popping
)
24077 /* We popped the stack pointer as well,
24078 find the register that contains it. */
24079 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24081 /* Move it into the stack register. */
24082 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24084 /* At this point we have popped all necessary registers, so
24085 do not worry about restoring regs_available_for_popping
24086 to its correct value:
24088 assert (pops_needed == 0)
24089 assert (regs_available_for_popping == (1 << frame_pointer))
24090 assert (regs_to_pop == (1 << STACK_POINTER)) */
24094 /* Since we have just move the popped value into the frame
24095 pointer, the popping register is available for reuse, and
24096 we know that we still have the stack pointer left to pop. */
24097 regs_available_for_popping
|= (1 << frame_pointer
);
24101 /* If we still have registers left on the stack, but we no longer have
24102 any registers into which we can pop them, then we must move the return
24103 address into the link register and make available the register that
24105 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24107 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24109 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24110 reg_containing_return_addr
);
24112 reg_containing_return_addr
= LR_REGNUM
;
24115 /* If we have registers left on the stack then pop some more.
24116 We know that at most we will want to pop FP and SP. */
24117 if (pops_needed
> 0)
24122 thumb_pop (f
, regs_available_for_popping
);
24124 /* We have popped either FP or SP.
24125 Move whichever one it is into the correct register. */
24126 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24127 move_to
= number_of_first_bit_set (regs_to_pop
);
24129 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24133 /* If we still have not popped everything then we must have only
24134 had one register available to us and we are now popping the SP. */
24135 if (pops_needed
> 0)
24139 thumb_pop (f
, regs_available_for_popping
);
24141 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24143 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24145 assert (regs_to_pop == (1 << STACK_POINTER))
24146 assert (pops_needed == 1)
24150 /* If necessary restore the a4 register. */
24153 if (reg_containing_return_addr
!= LR_REGNUM
)
24155 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24156 reg_containing_return_addr
= LR_REGNUM
;
24159 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24162 if (crtl
->calls_eh_return
)
24163 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24165 /* Return to caller. */
24166 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24168 /* This is for the cases where LR is not being used to contain the return
24169 address. It may therefore contain information that we might not want
24170 to leak, hence it must be cleared. The value in R0 will never be a
24171 secret at this point, so it is safe to use it, see the clearing code
24172 in 'cmse_nonsecure_entry_clear_before_return'. */
24173 if (reg_containing_return_addr
!= LR_REGNUM
)
24174 asm_fprintf (f
, "\tmov\tlr, r0\n");
24176 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24177 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24180 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24183 /* Scan INSN just before assembler is output for it.
24184 For Thumb-1, we track the status of the condition codes; this
24185 information is used in the cbranchsi4_insn pattern. */
24187 thumb1_final_prescan_insn (rtx_insn
*insn
)
24189 if (flag_print_asm_name
)
24190 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24191 INSN_ADDRESSES (INSN_UID (insn
)));
24192 /* Don't overwrite the previous setter when we get to a cbranch. */
24193 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24195 enum attr_conds conds
;
24197 if (cfun
->machine
->thumb1_cc_insn
)
24199 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24200 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24203 conds
= get_attr_conds (insn
);
24204 if (conds
== CONDS_SET
)
24206 rtx set
= single_set (insn
);
24207 cfun
->machine
->thumb1_cc_insn
= insn
;
24208 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24209 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24210 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24211 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24213 rtx src1
= XEXP (SET_SRC (set
), 1);
24214 if (src1
== const0_rtx
)
24215 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24217 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24219 /* Record the src register operand instead of dest because
24220 cprop_hardreg pass propagates src. */
24221 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24224 else if (conds
!= CONDS_NOCOND
)
24225 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24228 /* Check if unexpected far jump is used. */
24229 if (cfun
->machine
->lr_save_eliminated
24230 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24231 internal_error("Unexpected thumb1 far jump");
24235 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24237 unsigned HOST_WIDE_INT mask
= 0xff;
24240 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24241 if (val
== 0) /* XXX */
24244 for (i
= 0; i
< 25; i
++)
24245 if ((val
& (mask
<< i
)) == val
)
24251 /* Returns nonzero if the current function contains,
24252 or might contain a far jump. */
24254 thumb_far_jump_used_p (void)
24257 bool far_jump
= false;
24258 unsigned int func_size
= 0;
24260 /* If we have already decided that far jumps may be used,
24261 do not bother checking again, and always return true even if
24262 it turns out that they are not being used. Once we have made
24263 the decision that far jumps are present (and that hence the link
24264 register will be pushed onto the stack) we cannot go back on it. */
24265 if (cfun
->machine
->far_jump_used
)
24268 /* If this function is not being called from the prologue/epilogue
24269 generation code then it must be being called from the
24270 INITIAL_ELIMINATION_OFFSET macro. */
24271 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24273 /* In this case we know that we are being asked about the elimination
24274 of the arg pointer register. If that register is not being used,
24275 then there are no arguments on the stack, and we do not have to
24276 worry that a far jump might force the prologue to push the link
24277 register, changing the stack offsets. In this case we can just
24278 return false, since the presence of far jumps in the function will
24279 not affect stack offsets.
24281 If the arg pointer is live (or if it was live, but has now been
24282 eliminated and so set to dead) then we do have to test to see if
24283 the function might contain a far jump. This test can lead to some
24284 false negatives, since before reload is completed, then length of
24285 branch instructions is not known, so gcc defaults to returning their
24286 longest length, which in turn sets the far jump attribute to true.
24288 A false negative will not result in bad code being generated, but it
24289 will result in a needless push and pop of the link register. We
24290 hope that this does not occur too often.
24292 If we need doubleword stack alignment this could affect the other
24293 elimination offsets so we can't risk getting it wrong. */
24294 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24295 cfun
->machine
->arg_pointer_live
= 1;
24296 else if (!cfun
->machine
->arg_pointer_live
)
24300 /* We should not change far_jump_used during or after reload, as there is
24301 no chance to change stack frame layout. */
24302 if (reload_in_progress
|| reload_completed
)
24305 /* Check to see if the function contains a branch
24306 insn with the far jump attribute set. */
24307 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24309 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24313 func_size
+= get_attr_length (insn
);
24316 /* Attribute far_jump will always be true for thumb1 before
24317 shorten_branch pass. So checking far_jump attribute before
24318 shorten_branch isn't much useful.
24320 Following heuristic tries to estimate more accurately if a far jump
24321 may finally be used. The heuristic is very conservative as there is
24322 no chance to roll-back the decision of not to use far jump.
24324 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24325 2-byte insn is associated with a 4 byte constant pool. Using
24326 function size 2048/3 as the threshold is conservative enough. */
24329 if ((func_size
* 3) >= 2048)
24331 /* Record the fact that we have decided that
24332 the function does use far jumps. */
24333 cfun
->machine
->far_jump_used
= 1;
24341 /* Return nonzero if FUNC must be entered in ARM mode. */
24343 is_called_in_ARM_mode (tree func
)
24345 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24347 /* Ignore the problem about functions whose address is taken. */
24348 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24352 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24358 /* Given the stack offsets and register mask in OFFSETS, decide how
24359 many additional registers to push instead of subtracting a constant
24360 from SP. For epilogues the principle is the same except we use pop.
24361 FOR_PROLOGUE indicates which we're generating. */
24363 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24365 HOST_WIDE_INT amount
;
24366 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24367 /* Extract a mask of the ones we can give to the Thumb's push/pop
24369 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24370 /* Then count how many other high registers will need to be pushed. */
24371 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24372 int n_free
, reg_base
, size
;
24374 if (!for_prologue
&& frame_pointer_needed
)
24375 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24377 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24379 /* If the stack frame size is 512 exactly, we can save one load
24380 instruction, which should make this a win even when optimizing
24382 if (!optimize_size
&& amount
!= 512)
24385 /* Can't do this if there are high registers to push. */
24386 if (high_regs_pushed
!= 0)
24389 /* Shouldn't do it in the prologue if no registers would normally
24390 be pushed at all. In the epilogue, also allow it if we'll have
24391 a pop insn for the PC. */
24394 || TARGET_BACKTRACE
24395 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24396 || TARGET_INTERWORK
24397 || crtl
->args
.pretend_args_size
!= 0))
24400 /* Don't do this if thumb_expand_prologue wants to emit instructions
24401 between the push and the stack frame allocation. */
24403 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24404 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24411 size
= arm_size_return_regs ();
24412 reg_base
= ARM_NUM_INTS (size
);
24413 live_regs_mask
>>= reg_base
;
24416 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24417 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24419 live_regs_mask
>>= 1;
24425 gcc_assert (amount
/ 4 * 4 == amount
);
24427 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24428 return (amount
- 508) / 4;
24429 if (amount
<= n_free
* 4)
24434 /* The bits which aren't usefully expanded as rtl. */
24436 thumb1_unexpanded_epilogue (void)
24438 arm_stack_offsets
*offsets
;
24440 unsigned long live_regs_mask
= 0;
24441 int high_regs_pushed
= 0;
24443 int had_to_push_lr
;
24446 if (cfun
->machine
->return_used_this_function
!= 0)
24449 if (IS_NAKED (arm_current_func_type ()))
24452 offsets
= arm_get_frame_offsets ();
24453 live_regs_mask
= offsets
->saved_regs_mask
;
24454 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24456 /* If we can deduce the registers used from the function's return value.
24457 This is more reliable that examining df_regs_ever_live_p () because that
24458 will be set if the register is ever used in the function, not just if
24459 the register is used to hold a return value. */
24460 size
= arm_size_return_regs ();
24462 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24465 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24466 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24469 /* The prolog may have pushed some high registers to use as
24470 work registers. e.g. the testsuite file:
24471 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24472 compiles to produce:
24473 push {r4, r5, r6, r7, lr}
24477 as part of the prolog. We have to undo that pushing here. */
24479 if (high_regs_pushed
)
24481 unsigned long mask
= live_regs_mask
& 0xff;
24484 /* The available low registers depend on the size of the value we are
24492 /* Oh dear! We have no low registers into which we can pop
24495 ("no low registers available for popping high registers");
24497 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24498 if (live_regs_mask
& (1 << next_hi_reg
))
24501 while (high_regs_pushed
)
24503 /* Find lo register(s) into which the high register(s) can
24505 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24507 if (mask
& (1 << regno
))
24508 high_regs_pushed
--;
24509 if (high_regs_pushed
== 0)
24513 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24515 /* Pop the values into the low register(s). */
24516 thumb_pop (asm_out_file
, mask
);
24518 /* Move the value(s) into the high registers. */
24519 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24521 if (mask
& (1 << regno
))
24523 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24526 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24527 if (live_regs_mask
& (1 << next_hi_reg
))
24532 live_regs_mask
&= ~0x0f00;
24535 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24536 live_regs_mask
&= 0xff;
24538 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24540 /* Pop the return address into the PC. */
24541 if (had_to_push_lr
)
24542 live_regs_mask
|= 1 << PC_REGNUM
;
24544 /* Either no argument registers were pushed or a backtrace
24545 structure was created which includes an adjusted stack
24546 pointer, so just pop everything. */
24547 if (live_regs_mask
)
24548 thumb_pop (asm_out_file
, live_regs_mask
);
24550 /* We have either just popped the return address into the
24551 PC or it is was kept in LR for the entire function.
24552 Note that thumb_pop has already called thumb_exit if the
24553 PC was in the list. */
24554 if (!had_to_push_lr
)
24555 thumb_exit (asm_out_file
, LR_REGNUM
);
24559 /* Pop everything but the return address. */
24560 if (live_regs_mask
)
24561 thumb_pop (asm_out_file
, live_regs_mask
);
24563 if (had_to_push_lr
)
24567 /* We have no free low regs, so save one. */
24568 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24572 /* Get the return address into a temporary register. */
24573 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24577 /* Move the return address to lr. */
24578 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24580 /* Restore the low register. */
24581 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24586 regno
= LAST_ARG_REGNUM
;
24591 /* Remove the argument registers that were pushed onto the stack. */
24592 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24593 SP_REGNUM
, SP_REGNUM
,
24594 crtl
->args
.pretend_args_size
);
24596 thumb_exit (asm_out_file
, regno
);
24602 /* Functions to save and restore machine-specific function data. */
24603 static struct machine_function
*
24604 arm_init_machine_status (void)
24606 struct machine_function
*machine
;
24607 machine
= ggc_cleared_alloc
<machine_function
> ();
24609 #if ARM_FT_UNKNOWN != 0
24610 machine
->func_type
= ARM_FT_UNKNOWN
;
24615 /* Return an RTX indicating where the return address to the
24616 calling function can be found. */
24618 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24623 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24626 /* Do anything needed before RTL is emitted for each function. */
24628 arm_init_expanders (void)
24630 /* Arrange to initialize and mark the machine per-function status. */
24631 init_machine_status
= arm_init_machine_status
;
24633 /* This is to stop the combine pass optimizing away the alignment
24634 adjustment of va_arg. */
24635 /* ??? It is claimed that this should not be necessary. */
24637 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24640 /* Check that FUNC is called with a different mode. */
24643 arm_change_mode_p (tree func
)
24645 if (TREE_CODE (func
) != FUNCTION_DECL
)
24648 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24651 callee_tree
= target_option_default_node
;
24653 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24654 int flags
= callee_opts
->x_target_flags
;
24656 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24659 /* Like arm_compute_initial_elimination offset. Simpler because there
24660 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24661 to point at the base of the local variables after static stack
24662 space for a function has been allocated. */
24665 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24667 arm_stack_offsets
*offsets
;
24669 offsets
= arm_get_frame_offsets ();
24673 case ARG_POINTER_REGNUM
:
24676 case STACK_POINTER_REGNUM
:
24677 return offsets
->outgoing_args
- offsets
->saved_args
;
24679 case FRAME_POINTER_REGNUM
:
24680 return offsets
->soft_frame
- offsets
->saved_args
;
24682 case ARM_HARD_FRAME_POINTER_REGNUM
:
24683 return offsets
->saved_regs
- offsets
->saved_args
;
24685 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24686 return offsets
->locals_base
- offsets
->saved_args
;
24689 gcc_unreachable ();
24693 case FRAME_POINTER_REGNUM
:
24696 case STACK_POINTER_REGNUM
:
24697 return offsets
->outgoing_args
- offsets
->soft_frame
;
24699 case ARM_HARD_FRAME_POINTER_REGNUM
:
24700 return offsets
->saved_regs
- offsets
->soft_frame
;
24702 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24703 return offsets
->locals_base
- offsets
->soft_frame
;
24706 gcc_unreachable ();
24711 gcc_unreachable ();
24715 /* Generate the function's prologue. */
24718 thumb1_expand_prologue (void)
24722 HOST_WIDE_INT amount
;
24723 HOST_WIDE_INT size
;
24724 arm_stack_offsets
*offsets
;
24725 unsigned long func_type
;
24727 unsigned long live_regs_mask
;
24728 unsigned long l_mask
;
24729 unsigned high_regs_pushed
= 0;
24730 bool lr_needs_saving
;
24732 func_type
= arm_current_func_type ();
24734 /* Naked functions don't have prologues. */
24735 if (IS_NAKED (func_type
))
24737 if (flag_stack_usage_info
)
24738 current_function_static_stack_size
= 0;
24742 if (IS_INTERRUPT (func_type
))
24744 error ("interrupt Service Routines cannot be coded in Thumb mode");
24748 if (is_called_in_ARM_mode (current_function_decl
))
24749 emit_insn (gen_prologue_thumb1_interwork ());
24751 offsets
= arm_get_frame_offsets ();
24752 live_regs_mask
= offsets
->saved_regs_mask
;
24753 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24755 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24756 l_mask
= live_regs_mask
& 0x40ff;
24757 /* Then count how many other high registers will need to be pushed. */
24758 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24760 if (crtl
->args
.pretend_args_size
)
24762 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24764 if (cfun
->machine
->uses_anonymous_args
)
24766 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24767 unsigned long mask
;
24769 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24770 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24772 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24776 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24777 stack_pointer_rtx
, x
));
24779 RTX_FRAME_RELATED_P (insn
) = 1;
24782 if (TARGET_BACKTRACE
)
24784 HOST_WIDE_INT offset
= 0;
24785 unsigned work_register
;
24786 rtx work_reg
, x
, arm_hfp_rtx
;
24788 /* We have been asked to create a stack backtrace structure.
24789 The code looks like this:
24793 0 sub SP, #16 Reserve space for 4 registers.
24794 2 push {R7} Push low registers.
24795 4 add R7, SP, #20 Get the stack pointer before the push.
24796 6 str R7, [SP, #8] Store the stack pointer
24797 (before reserving the space).
24798 8 mov R7, PC Get hold of the start of this code + 12.
24799 10 str R7, [SP, #16] Store it.
24800 12 mov R7, FP Get hold of the current frame pointer.
24801 14 str R7, [SP, #4] Store it.
24802 16 mov R7, LR Get hold of the current return address.
24803 18 str R7, [SP, #12] Store it.
24804 20 add R7, SP, #16 Point at the start of the
24805 backtrace structure.
24806 22 mov FP, R7 Put this value into the frame pointer. */
24808 work_register
= thumb_find_work_register (live_regs_mask
);
24809 work_reg
= gen_rtx_REG (SImode
, work_register
);
24810 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24812 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24813 stack_pointer_rtx
, GEN_INT (-16)));
24814 RTX_FRAME_RELATED_P (insn
) = 1;
24818 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24819 RTX_FRAME_RELATED_P (insn
) = 1;
24820 lr_needs_saving
= false;
24822 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24825 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24826 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24828 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24829 x
= gen_frame_mem (SImode
, x
);
24830 emit_move_insn (x
, work_reg
);
24832 /* Make sure that the instruction fetching the PC is in the right place
24833 to calculate "start of backtrace creation code + 12". */
24834 /* ??? The stores using the common WORK_REG ought to be enough to
24835 prevent the scheduler from doing anything weird. Failing that
24836 we could always move all of the following into an UNSPEC_VOLATILE. */
24839 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24840 emit_move_insn (work_reg
, x
);
24842 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24843 x
= gen_frame_mem (SImode
, x
);
24844 emit_move_insn (x
, work_reg
);
24846 emit_move_insn (work_reg
, arm_hfp_rtx
);
24848 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24849 x
= gen_frame_mem (SImode
, x
);
24850 emit_move_insn (x
, work_reg
);
24854 emit_move_insn (work_reg
, arm_hfp_rtx
);
24856 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24857 x
= gen_frame_mem (SImode
, x
);
24858 emit_move_insn (x
, work_reg
);
24860 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24861 emit_move_insn (work_reg
, x
);
24863 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24864 x
= gen_frame_mem (SImode
, x
);
24865 emit_move_insn (x
, work_reg
);
24868 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24869 emit_move_insn (work_reg
, x
);
24871 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24872 x
= gen_frame_mem (SImode
, x
);
24873 emit_move_insn (x
, work_reg
);
24875 x
= GEN_INT (offset
+ 12);
24876 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24878 emit_move_insn (arm_hfp_rtx
, work_reg
);
24880 /* Optimization: If we are not pushing any low registers but we are going
24881 to push some high registers then delay our first push. This will just
24882 be a push of LR and we can combine it with the push of the first high
24884 else if ((l_mask
& 0xff) != 0
24885 || (high_regs_pushed
== 0 && lr_needs_saving
))
24887 unsigned long mask
= l_mask
;
24888 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24889 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24890 RTX_FRAME_RELATED_P (insn
) = 1;
24891 lr_needs_saving
= false;
24894 if (high_regs_pushed
)
24896 unsigned pushable_regs
;
24897 unsigned next_hi_reg
;
24898 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24899 : crtl
->args
.info
.nregs
;
24900 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24902 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24903 if (live_regs_mask
& (1 << next_hi_reg
))
24906 /* Here we need to mask out registers used for passing arguments
24907 even if they can be pushed. This is to avoid using them to stash the high
24908 registers. Such kind of stash may clobber the use of arguments. */
24909 pushable_regs
= l_mask
& (~arg_regs_mask
);
24910 if (lr_needs_saving
)
24911 pushable_regs
&= ~(1 << LR_REGNUM
);
24913 if (pushable_regs
== 0)
24914 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24916 while (high_regs_pushed
> 0)
24918 unsigned long real_regs_mask
= 0;
24919 unsigned long push_mask
= 0;
24921 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24923 if (pushable_regs
& (1 << regno
))
24925 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24926 gen_rtx_REG (SImode
, next_hi_reg
));
24928 high_regs_pushed
--;
24929 real_regs_mask
|= (1 << next_hi_reg
);
24930 push_mask
|= (1 << regno
);
24932 if (high_regs_pushed
)
24934 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24936 if (live_regs_mask
& (1 << next_hi_reg
))
24944 /* If we had to find a work register and we have not yet
24945 saved the LR then add it to the list of regs to push. */
24946 if (lr_needs_saving
)
24948 push_mask
|= 1 << LR_REGNUM
;
24949 real_regs_mask
|= 1 << LR_REGNUM
;
24950 lr_needs_saving
= false;
24953 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
24954 RTX_FRAME_RELATED_P (insn
) = 1;
24958 /* Load the pic register before setting the frame pointer,
24959 so we can use r7 as a temporary work register. */
24960 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24961 arm_load_pic_register (live_regs_mask
);
24963 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24964 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24965 stack_pointer_rtx
);
24967 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24968 if (flag_stack_usage_info
)
24969 current_function_static_stack_size
= size
;
24971 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24972 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24973 sorry ("-fstack-check=specific for Thumb-1");
24975 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24976 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24981 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24982 GEN_INT (- amount
)));
24983 RTX_FRAME_RELATED_P (insn
) = 1;
24989 /* The stack decrement is too big for an immediate value in a single
24990 insn. In theory we could issue multiple subtracts, but after
24991 three of them it becomes more space efficient to place the full
24992 value in the constant pool and load into a register. (Also the
24993 ARM debugger really likes to see only one stack decrement per
24994 function). So instead we look for a scratch register into which
24995 we can load the decrement, and then we subtract this from the
24996 stack pointer. Unfortunately on the thumb the only available
24997 scratch registers are the argument registers, and we cannot use
24998 these as they may hold arguments to the function. Instead we
24999 attempt to locate a call preserved register which is used by this
25000 function. If we can find one, then we know that it will have
25001 been pushed at the start of the prologue and so we can corrupt
25003 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25004 if (live_regs_mask
& (1 << regno
))
25007 gcc_assert(regno
<= LAST_LO_REGNUM
);
25009 reg
= gen_rtx_REG (SImode
, regno
);
25011 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25013 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25014 stack_pointer_rtx
, reg
));
25016 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25017 plus_constant (Pmode
, stack_pointer_rtx
,
25019 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25020 RTX_FRAME_RELATED_P (insn
) = 1;
25024 if (frame_pointer_needed
)
25025 thumb_set_frame_pointer (offsets
);
25027 /* If we are profiling, make sure no instructions are scheduled before
25028 the call to mcount. Similarly if the user has requested no
25029 scheduling in the prolog. Similarly if we want non-call exceptions
25030 using the EABI unwinder, to prevent faulting instructions from being
25031 swapped with a stack adjustment. */
25032 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25033 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25034 && cfun
->can_throw_non_call_exceptions
))
25035 emit_insn (gen_blockage ());
25037 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25038 if (live_regs_mask
& 0xff)
25039 cfun
->machine
->lr_save_eliminated
= 0;
25042 /* Clear caller saved registers not used to pass return values and leaked
25043 condition flags before exiting a cmse_nonsecure_entry function. */
25046 cmse_nonsecure_entry_clear_before_return (void)
25048 uint64_t to_clear_mask
[2];
25049 uint32_t padding_bits_to_clear
= 0;
25050 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
25051 int regno
, maxregno
= IP_REGNUM
;
25055 to_clear_mask
[0] = (1ULL << (NUM_ARG_REGS
)) - 1;
25056 to_clear_mask
[0] |= (1ULL << IP_REGNUM
);
25058 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25059 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25060 to make sure the instructions used to clear them are present. */
25061 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
25063 uint64_t float_mask
= (1ULL << (D7_VFP_REGNUM
+ 1)) - 1;
25064 maxregno
= LAST_VFP_REGNUM
;
25066 float_mask
&= ~((1ULL << FIRST_VFP_REGNUM
) - 1);
25067 to_clear_mask
[0] |= float_mask
;
25069 float_mask
= (1ULL << (maxregno
- 63)) - 1;
25070 to_clear_mask
[1] = float_mask
;
25072 /* Make sure we don't clear the two scratch registers used to clear the
25073 relevant FPSCR bits in output_return_instruction. */
25074 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25075 to_clear_mask
[0] &= ~(1ULL << IP_REGNUM
);
25076 emit_use (gen_rtx_REG (SImode
, 4));
25077 to_clear_mask
[0] &= ~(1ULL << 4);
25080 /* If the user has defined registers to be caller saved, these are no longer
25081 restored by the function before returning and must thus be cleared for
25082 security purposes. */
25083 for (regno
= NUM_ARG_REGS
; regno
< LAST_VFP_REGNUM
; regno
++)
25085 /* We do not touch registers that can be used to pass arguments as per
25086 the AAPCS, since these should never be made callee-saved by user
25088 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25090 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25092 if (call_used_regs
[regno
])
25093 to_clear_mask
[regno
/ 64] |= (1ULL << (regno
% 64));
25096 /* Make sure we do not clear the registers used to return the result in. */
25097 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25098 if (!VOID_TYPE_P (result_type
))
25100 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25102 /* No need to check that we return in registers, because we don't
25103 support returning on stack yet. */
25105 &= ~compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25106 padding_bits_to_clear_ptr
);
25109 if (padding_bits_to_clear
!= 0)
25112 /* Padding bits to clear is not 0 so we know we are dealing with
25113 returning a composite type, which only uses r0. Let's make sure that
25114 r1-r3 is cleared too, we will use r1 as a scratch register. */
25115 gcc_assert ((to_clear_mask
[0] & 0xe) == 0xe);
25117 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
25119 /* Fill the lower half of the negated padding_bits_to_clear. */
25120 emit_move_insn (reg_rtx
,
25121 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
25123 /* Also fill the top half of the negated padding_bits_to_clear. */
25124 if (((~padding_bits_to_clear
) >> 16) > 0)
25125 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
25128 GEN_INT ((~padding_bits_to_clear
) >> 16)));
25130 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
25131 gen_rtx_REG (SImode
, R0_REGNUM
),
25135 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25137 if (!(to_clear_mask
[regno
/ 64] & (1ULL << (regno
% 64))))
25140 if (IS_VFP_REGNUM (regno
))
25142 /* If regno is an even vfp register and its successor is also to
25143 be cleared, use vmov. */
25144 if (TARGET_VFP_DOUBLE
25145 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25146 && to_clear_mask
[regno
/ 64] & (1ULL << ((regno
% 64) + 1)))
25148 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25149 CONST1_RTX (DFmode
));
25150 emit_use (gen_rtx_REG (DFmode
, regno
));
25155 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25156 CONST1_RTX (SFmode
));
25157 emit_use (gen_rtx_REG (SFmode
, regno
));
25164 if (regno
== R0_REGNUM
)
25165 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25168 /* R0 has either been cleared before, see code above, or it
25169 holds a return value, either way it is not secret
25171 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25172 gen_rtx_REG (SImode
, R0_REGNUM
));
25173 emit_use (gen_rtx_REG (SImode
, regno
));
25177 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25178 gen_rtx_REG (SImode
, LR_REGNUM
));
25179 emit_use (gen_rtx_REG (SImode
, regno
));
25185 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25186 POP instruction can be generated. LR should be replaced by PC. All
25187 the checks required are already done by USE_RETURN_INSN (). Hence,
25188 all we really need to check here is if single register is to be
25189 returned, or multiple register return. */
25191 thumb2_expand_return (bool simple_return
)
25194 unsigned long saved_regs_mask
;
25195 arm_stack_offsets
*offsets
;
25197 offsets
= arm_get_frame_offsets ();
25198 saved_regs_mask
= offsets
->saved_regs_mask
;
25200 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25201 if (saved_regs_mask
& (1 << i
))
25204 if (!simple_return
&& saved_regs_mask
)
25206 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25207 functions or adapt code to handle according to ACLE. This path should
25208 not be reachable for cmse_nonsecure_entry functions though we prefer
25209 to assert it for now to ensure that future code changes do not silently
25210 change this behavior. */
25211 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25214 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25215 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25216 rtx addr
= gen_rtx_MEM (SImode
,
25217 gen_rtx_POST_INC (SImode
,
25218 stack_pointer_rtx
));
25219 set_mem_alias_set (addr
, get_frame_alias_set ());
25220 XVECEXP (par
, 0, 0) = ret_rtx
;
25221 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25222 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25223 emit_jump_insn (par
);
25227 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25228 saved_regs_mask
|= (1 << PC_REGNUM
);
25229 arm_emit_multi_reg_pop (saved_regs_mask
);
25234 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25235 cmse_nonsecure_entry_clear_before_return ();
25236 emit_jump_insn (simple_return_rtx
);
25241 thumb1_expand_epilogue (void)
25243 HOST_WIDE_INT amount
;
25244 arm_stack_offsets
*offsets
;
25247 /* Naked functions don't have prologues. */
25248 if (IS_NAKED (arm_current_func_type ()))
25251 offsets
= arm_get_frame_offsets ();
25252 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25254 if (frame_pointer_needed
)
25256 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25257 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25259 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25261 gcc_assert (amount
>= 0);
25264 emit_insn (gen_blockage ());
25267 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25268 GEN_INT (amount
)));
25271 /* r3 is always free in the epilogue. */
25272 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25274 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25275 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25279 /* Emit a USE (stack_pointer_rtx), so that
25280 the stack adjustment will not be deleted. */
25281 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25283 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25284 emit_insn (gen_blockage ());
25286 /* Emit a clobber for each insn that will be restored in the epilogue,
25287 so that flow2 will get register lifetimes correct. */
25288 for (regno
= 0; regno
< 13; regno
++)
25289 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25290 emit_clobber (gen_rtx_REG (SImode
, regno
));
25292 if (! df_regs_ever_live_p (LR_REGNUM
))
25293 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25295 /* Clear all caller-saved regs that are not used to return. */
25296 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25297 cmse_nonsecure_entry_clear_before_return ();
25300 /* Epilogue code for APCS frame. */
25302 arm_expand_epilogue_apcs_frame (bool really_return
)
25304 unsigned long func_type
;
25305 unsigned long saved_regs_mask
;
25308 int floats_from_frame
= 0;
25309 arm_stack_offsets
*offsets
;
25311 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25312 func_type
= arm_current_func_type ();
25314 /* Get frame offsets for ARM. */
25315 offsets
= arm_get_frame_offsets ();
25316 saved_regs_mask
= offsets
->saved_regs_mask
;
25318 /* Find the offset of the floating-point save area in the frame. */
25320 = (offsets
->saved_args
25321 + arm_compute_static_chain_stack_bytes ()
25324 /* Compute how many core registers saved and how far away the floats are. */
25325 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25326 if (saved_regs_mask
& (1 << i
))
25329 floats_from_frame
+= 4;
25332 if (TARGET_HARD_FLOAT
)
25335 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25337 /* The offset is from IP_REGNUM. */
25338 int saved_size
= arm_get_vfp_saved_size ();
25339 if (saved_size
> 0)
25342 floats_from_frame
+= saved_size
;
25343 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25344 hard_frame_pointer_rtx
,
25345 GEN_INT (-floats_from_frame
)));
25346 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25347 ip_rtx
, hard_frame_pointer_rtx
);
25350 /* Generate VFP register multi-pop. */
25351 start_reg
= FIRST_VFP_REGNUM
;
25353 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25354 /* Look for a case where a reg does not need restoring. */
25355 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25356 && (!df_regs_ever_live_p (i
+ 1)
25357 || call_used_regs
[i
+ 1]))
25359 if (start_reg
!= i
)
25360 arm_emit_vfp_multi_reg_pop (start_reg
,
25361 (i
- start_reg
) / 2,
25362 gen_rtx_REG (SImode
,
25367 /* Restore the remaining regs that we have discovered (or possibly
25368 even all of them, if the conditional in the for loop never
25370 if (start_reg
!= i
)
25371 arm_emit_vfp_multi_reg_pop (start_reg
,
25372 (i
- start_reg
) / 2,
25373 gen_rtx_REG (SImode
, IP_REGNUM
));
25378 /* The frame pointer is guaranteed to be non-double-word aligned, as
25379 it is set to double-word-aligned old_stack_pointer - 4. */
25381 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25383 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25384 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25386 rtx addr
= gen_frame_mem (V2SImode
,
25387 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25389 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25390 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25391 gen_rtx_REG (V2SImode
, i
),
25397 /* saved_regs_mask should contain IP which contains old stack pointer
25398 at the time of activation creation. Since SP and IP are adjacent registers,
25399 we can restore the value directly into SP. */
25400 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25401 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25402 saved_regs_mask
|= (1 << SP_REGNUM
);
25404 /* There are two registers left in saved_regs_mask - LR and PC. We
25405 only need to restore LR (the return address), but to
25406 save time we can load it directly into PC, unless we need a
25407 special function exit sequence, or we are not really returning. */
25409 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25410 && !crtl
->calls_eh_return
)
25411 /* Delete LR from the register mask, so that LR on
25412 the stack is loaded into the PC in the register mask. */
25413 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25415 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25417 num_regs
= bit_count (saved_regs_mask
);
25418 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25421 emit_insn (gen_blockage ());
25422 /* Unwind the stack to just below the saved registers. */
25423 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25424 hard_frame_pointer_rtx
,
25425 GEN_INT (- 4 * num_regs
)));
25427 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25428 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25431 arm_emit_multi_reg_pop (saved_regs_mask
);
25433 if (IS_INTERRUPT (func_type
))
25435 /* Interrupt handlers will have pushed the
25436 IP onto the stack, so restore it now. */
25438 rtx addr
= gen_rtx_MEM (SImode
,
25439 gen_rtx_POST_INC (SImode
,
25440 stack_pointer_rtx
));
25441 set_mem_alias_set (addr
, get_frame_alias_set ());
25442 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25443 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25444 gen_rtx_REG (SImode
, IP_REGNUM
),
25448 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25451 if (crtl
->calls_eh_return
)
25452 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25454 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25456 if (IS_STACKALIGN (func_type
))
25457 /* Restore the original stack pointer. Before prologue, the stack was
25458 realigned and the original stack pointer saved in r0. For details,
25459 see comment in arm_expand_prologue. */
25460 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25462 emit_jump_insn (simple_return_rtx
);
25465 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25466 function is not a sibcall. */
25468 arm_expand_epilogue (bool really_return
)
25470 unsigned long func_type
;
25471 unsigned long saved_regs_mask
;
25475 arm_stack_offsets
*offsets
;
25477 func_type
= arm_current_func_type ();
25479 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25480 let output_return_instruction take care of instruction emission if any. */
25481 if (IS_NAKED (func_type
)
25482 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25485 emit_jump_insn (simple_return_rtx
);
25489 /* If we are throwing an exception, then we really must be doing a
25490 return, so we can't tail-call. */
25491 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25493 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25495 arm_expand_epilogue_apcs_frame (really_return
);
25499 /* Get frame offsets for ARM. */
25500 offsets
= arm_get_frame_offsets ();
25501 saved_regs_mask
= offsets
->saved_regs_mask
;
25502 num_regs
= bit_count (saved_regs_mask
);
25504 if (frame_pointer_needed
)
25507 /* Restore stack pointer if necessary. */
25510 /* In ARM mode, frame pointer points to first saved register.
25511 Restore stack pointer to last saved register. */
25512 amount
= offsets
->frame
- offsets
->saved_regs
;
25514 /* Force out any pending memory operations that reference stacked data
25515 before stack de-allocation occurs. */
25516 emit_insn (gen_blockage ());
25517 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25518 hard_frame_pointer_rtx
,
25519 GEN_INT (amount
)));
25520 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25522 hard_frame_pointer_rtx
);
25524 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25526 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25530 /* In Thumb-2 mode, the frame pointer points to the last saved
25532 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25535 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25536 hard_frame_pointer_rtx
,
25537 GEN_INT (amount
)));
25538 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25539 hard_frame_pointer_rtx
,
25540 hard_frame_pointer_rtx
);
25543 /* Force out any pending memory operations that reference stacked data
25544 before stack de-allocation occurs. */
25545 emit_insn (gen_blockage ());
25546 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25547 hard_frame_pointer_rtx
));
25548 arm_add_cfa_adjust_cfa_note (insn
, 0,
25550 hard_frame_pointer_rtx
);
25551 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25553 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25558 /* Pop off outgoing args and local frame to adjust stack pointer to
25559 last saved register. */
25560 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25564 /* Force out any pending memory operations that reference stacked data
25565 before stack de-allocation occurs. */
25566 emit_insn (gen_blockage ());
25567 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25569 GEN_INT (amount
)));
25570 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25571 stack_pointer_rtx
, stack_pointer_rtx
);
25572 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25574 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25578 if (TARGET_HARD_FLOAT
)
25580 /* Generate VFP register multi-pop. */
25581 int end_reg
= LAST_VFP_REGNUM
+ 1;
25583 /* Scan the registers in reverse order. We need to match
25584 any groupings made in the prologue and generate matching
25585 vldm operations. The need to match groups is because,
25586 unlike pop, vldm can only do consecutive regs. */
25587 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25588 /* Look for a case where a reg does not need restoring. */
25589 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25590 && (!df_regs_ever_live_p (i
+ 1)
25591 || call_used_regs
[i
+ 1]))
25593 /* Restore the regs discovered so far (from reg+2 to
25595 if (end_reg
> i
+ 2)
25596 arm_emit_vfp_multi_reg_pop (i
+ 2,
25597 (end_reg
- (i
+ 2)) / 2,
25598 stack_pointer_rtx
);
25602 /* Restore the remaining regs that we have discovered (or possibly
25603 even all of them, if the conditional in the for loop never
25605 if (end_reg
> i
+ 2)
25606 arm_emit_vfp_multi_reg_pop (i
+ 2,
25607 (end_reg
- (i
+ 2)) / 2,
25608 stack_pointer_rtx
);
25612 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25613 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25616 rtx addr
= gen_rtx_MEM (V2SImode
,
25617 gen_rtx_POST_INC (SImode
,
25618 stack_pointer_rtx
));
25619 set_mem_alias_set (addr
, get_frame_alias_set ());
25620 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25621 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25622 gen_rtx_REG (V2SImode
, i
),
25624 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25625 stack_pointer_rtx
, stack_pointer_rtx
);
25628 if (saved_regs_mask
)
25631 bool return_in_pc
= false;
25633 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25634 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25635 && !IS_CMSE_ENTRY (func_type
)
25636 && !IS_STACKALIGN (func_type
)
25638 && crtl
->args
.pretend_args_size
== 0
25639 && saved_regs_mask
& (1 << LR_REGNUM
)
25640 && !crtl
->calls_eh_return
)
25642 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25643 saved_regs_mask
|= (1 << PC_REGNUM
);
25644 return_in_pc
= true;
25647 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25649 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25650 if (saved_regs_mask
& (1 << i
))
25652 rtx addr
= gen_rtx_MEM (SImode
,
25653 gen_rtx_POST_INC (SImode
,
25654 stack_pointer_rtx
));
25655 set_mem_alias_set (addr
, get_frame_alias_set ());
25657 if (i
== PC_REGNUM
)
25659 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25660 XVECEXP (insn
, 0, 0) = ret_rtx
;
25661 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25663 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25664 insn
= emit_jump_insn (insn
);
25668 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25670 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25671 gen_rtx_REG (SImode
, i
),
25673 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25675 stack_pointer_rtx
);
25682 && current_tune
->prefer_ldrd_strd
25683 && !optimize_function_for_size_p (cfun
))
25686 thumb2_emit_ldrd_pop (saved_regs_mask
);
25687 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25688 arm_emit_ldrd_pop (saved_regs_mask
);
25690 arm_emit_multi_reg_pop (saved_regs_mask
);
25693 arm_emit_multi_reg_pop (saved_regs_mask
);
25701 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25705 rtx dwarf
= NULL_RTX
;
25707 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25709 GEN_INT (amount
)));
25711 RTX_FRAME_RELATED_P (tmp
) = 1;
25713 if (cfun
->machine
->uses_anonymous_args
)
25715 /* Restore pretend args. Refer arm_expand_prologue on how to save
25716 pretend_args in stack. */
25717 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25718 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25719 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25720 if (saved_regs_mask
& (1 << i
))
25722 rtx reg
= gen_rtx_REG (SImode
, i
);
25723 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25726 REG_NOTES (tmp
) = dwarf
;
25728 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25729 stack_pointer_rtx
, stack_pointer_rtx
);
25732 /* Clear all caller-saved regs that are not used to return. */
25733 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25735 /* CMSE_ENTRY always returns. */
25736 gcc_assert (really_return
);
25737 cmse_nonsecure_entry_clear_before_return ();
25740 if (!really_return
)
25743 if (crtl
->calls_eh_return
)
25744 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25746 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25748 if (IS_STACKALIGN (func_type
))
25749 /* Restore the original stack pointer. Before prologue, the stack was
25750 realigned and the original stack pointer saved in r0. For details,
25751 see comment in arm_expand_prologue. */
25752 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25754 emit_jump_insn (simple_return_rtx
);
25757 /* Implementation of insn prologue_thumb1_interwork. This is the first
25758 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25761 thumb1_output_interwork (void)
25764 FILE *f
= asm_out_file
;
25766 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25767 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25769 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25771 /* Generate code sequence to switch us into Thumb mode. */
25772 /* The .code 32 directive has already been emitted by
25773 ASM_DECLARE_FUNCTION_NAME. */
25774 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25775 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25777 /* Generate a label, so that the debugger will notice the
25778 change in instruction sets. This label is also used by
25779 the assembler to bypass the ARM code when this function
25780 is called from a Thumb encoded function elsewhere in the
25781 same file. Hence the definition of STUB_NAME here must
25782 agree with the definition in gas/config/tc-arm.c. */
25784 #define STUB_NAME ".real_start_of"
25786 fprintf (f
, "\t.code\t16\n");
25788 if (arm_dllexport_name_p (name
))
25789 name
= arm_strip_name_encoding (name
);
25791 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25792 fprintf (f
, "\t.thumb_func\n");
25793 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25798 /* Handle the case of a double word load into a low register from
25799 a computed memory address. The computed address may involve a
25800 register which is overwritten by the load. */
25802 thumb_load_double_from_address (rtx
*operands
)
25810 gcc_assert (REG_P (operands
[0]));
25811 gcc_assert (MEM_P (operands
[1]));
25813 /* Get the memory address. */
25814 addr
= XEXP (operands
[1], 0);
25816 /* Work out how the memory address is computed. */
25817 switch (GET_CODE (addr
))
25820 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25822 if (REGNO (operands
[0]) == REGNO (addr
))
25824 output_asm_insn ("ldr\t%H0, %2", operands
);
25825 output_asm_insn ("ldr\t%0, %1", operands
);
25829 output_asm_insn ("ldr\t%0, %1", operands
);
25830 output_asm_insn ("ldr\t%H0, %2", operands
);
25835 /* Compute <address> + 4 for the high order load. */
25836 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25838 output_asm_insn ("ldr\t%0, %1", operands
);
25839 output_asm_insn ("ldr\t%H0, %2", operands
);
25843 arg1
= XEXP (addr
, 0);
25844 arg2
= XEXP (addr
, 1);
25846 if (CONSTANT_P (arg1
))
25847 base
= arg2
, offset
= arg1
;
25849 base
= arg1
, offset
= arg2
;
25851 gcc_assert (REG_P (base
));
25853 /* Catch the case of <address> = <reg> + <reg> */
25854 if (REG_P (offset
))
25856 int reg_offset
= REGNO (offset
);
25857 int reg_base
= REGNO (base
);
25858 int reg_dest
= REGNO (operands
[0]);
25860 /* Add the base and offset registers together into the
25861 higher destination register. */
25862 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25863 reg_dest
+ 1, reg_base
, reg_offset
);
25865 /* Load the lower destination register from the address in
25866 the higher destination register. */
25867 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25868 reg_dest
, reg_dest
+ 1);
25870 /* Load the higher destination register from its own address
25872 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25873 reg_dest
+ 1, reg_dest
+ 1);
25877 /* Compute <address> + 4 for the high order load. */
25878 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25880 /* If the computed address is held in the low order register
25881 then load the high order register first, otherwise always
25882 load the low order register first. */
25883 if (REGNO (operands
[0]) == REGNO (base
))
25885 output_asm_insn ("ldr\t%H0, %2", operands
);
25886 output_asm_insn ("ldr\t%0, %1", operands
);
25890 output_asm_insn ("ldr\t%0, %1", operands
);
25891 output_asm_insn ("ldr\t%H0, %2", operands
);
25897 /* With no registers to worry about we can just load the value
25899 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25901 output_asm_insn ("ldr\t%H0, %2", operands
);
25902 output_asm_insn ("ldr\t%0, %1", operands
);
25906 gcc_unreachable ();
25913 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25918 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25919 std::swap (operands
[4], operands
[5]);
25921 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25922 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25926 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25927 std::swap (operands
[4], operands
[5]);
25928 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25929 std::swap (operands
[5], operands
[6]);
25930 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25931 std::swap (operands
[4], operands
[5]);
25933 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25934 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25938 gcc_unreachable ();
25944 /* Output a call-via instruction for thumb state. */
25946 thumb_call_via_reg (rtx reg
)
25948 int regno
= REGNO (reg
);
25951 gcc_assert (regno
< LR_REGNUM
);
25953 /* If we are in the normal text section we can use a single instance
25954 per compilation unit. If we are doing function sections, then we need
25955 an entry per section, since we can't rely on reachability. */
25956 if (in_section
== text_section
)
25958 thumb_call_reg_needed
= 1;
25960 if (thumb_call_via_label
[regno
] == NULL
)
25961 thumb_call_via_label
[regno
] = gen_label_rtx ();
25962 labelp
= thumb_call_via_label
+ regno
;
25966 if (cfun
->machine
->call_via
[regno
] == NULL
)
25967 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25968 labelp
= cfun
->machine
->call_via
+ regno
;
25971 output_asm_insn ("bl\t%a0", labelp
);
25975 /* Routines for generating rtl. */
25977 thumb_expand_movmemqi (rtx
*operands
)
25979 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25980 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25981 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25982 HOST_WIDE_INT offset
= 0;
25986 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25992 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25998 rtx reg
= gen_reg_rtx (SImode
);
25999 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
26000 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
26007 rtx reg
= gen_reg_rtx (HImode
);
26008 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
26009 plus_constant (Pmode
, in
,
26011 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
26020 rtx reg
= gen_reg_rtx (QImode
);
26021 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
26022 plus_constant (Pmode
, in
,
26024 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
26031 thumb_reload_out_hi (rtx
*operands
)
26033 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
26036 /* Return the length of a function name prefix
26037 that starts with the character 'c'. */
26039 arm_get_strip_length (int c
)
26043 ARM_NAME_ENCODING_LENGTHS
26048 /* Return a pointer to a function's name with any
26049 and all prefix encodings stripped from it. */
26051 arm_strip_name_encoding (const char *name
)
26055 while ((skip
= arm_get_strip_length (* name
)))
26061 /* If there is a '*' anywhere in the name's prefix, then
26062 emit the stripped name verbatim, otherwise prepend an
26063 underscore if leading underscores are being used. */
26065 arm_asm_output_labelref (FILE *stream
, const char *name
)
26070 while ((skip
= arm_get_strip_length (* name
)))
26072 verbatim
|= (*name
== '*');
26077 fputs (name
, stream
);
26079 asm_fprintf (stream
, "%U%s", name
);
26082 /* This function is used to emit an EABI tag and its associated value.
26083 We emit the numerical value of the tag in case the assembler does not
26084 support textual tags. (Eg gas prior to 2.20). If requested we include
26085 the tag name in a comment so that anyone reading the assembler output
26086 will know which tag is being set.
26088 This function is not static because arm-c.c needs it too. */
26091 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26093 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26094 if (flag_verbose_asm
|| flag_debug_asm
)
26095 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26096 asm_fprintf (asm_out_file
, "\n");
26099 /* This function is used to print CPU tuning information as comment
26100 in assembler file. Pointers are not printed for now. */
26103 arm_print_tune_info (void)
26105 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26106 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26107 current_tune
->constant_limit
);
26108 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26109 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26110 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26111 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26112 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26113 "prefetch.l1_cache_size:\t%d\n",
26114 current_tune
->prefetch
.l1_cache_size
);
26115 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26116 "prefetch.l1_cache_line_size:\t%d\n",
26117 current_tune
->prefetch
.l1_cache_line_size
);
26118 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26119 "prefer_constant_pool:\t%d\n",
26120 (int) current_tune
->prefer_constant_pool
);
26121 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26122 "branch_cost:\t(s:speed, p:predictable)\n");
26123 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26124 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26125 current_tune
->branch_cost (false, false));
26126 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26127 current_tune
->branch_cost (false, true));
26128 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26129 current_tune
->branch_cost (true, false));
26130 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26131 current_tune
->branch_cost (true, true));
26132 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26133 "prefer_ldrd_strd:\t%d\n",
26134 (int) current_tune
->prefer_ldrd_strd
);
26135 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26136 "logical_op_non_short_circuit:\t[%d,%d]\n",
26137 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26138 (int) current_tune
->logical_op_non_short_circuit_arm
);
26139 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26140 "prefer_neon_for_64bits:\t%d\n",
26141 (int) current_tune
->prefer_neon_for_64bits
);
26142 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26143 "disparage_flag_setting_t16_encodings:\t%d\n",
26144 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26145 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26146 "string_ops_prefer_neon:\t%d\n",
26147 (int) current_tune
->string_ops_prefer_neon
);
26148 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26149 "max_insns_inline_memset:\t%d\n",
26150 current_tune
->max_insns_inline_memset
);
26151 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26152 current_tune
->fusible_ops
);
26153 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26154 (int) current_tune
->sched_autopref
);
26157 /* Print .arch and .arch_extension directives corresponding to the
26158 current architecture configuration. */
26160 arm_print_asm_arch_directives ()
26162 const arch_option
*arch
26163 = arm_parse_arch_option_name (all_architectures
, "-march",
26164 arm_active_target
.arch_name
);
26165 auto_sbitmap
opt_bits (isa_num_bits
);
26169 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
26170 if (!arch
->common
.extensions
)
26173 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
26179 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
26181 /* If every feature bit of this option is set in the target
26182 ISA specification, print out the option name. However,
26183 don't print anything if all the bits are part of the
26184 FPU specification. */
26185 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
26186 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
26187 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
26193 arm_file_start (void)
26199 /* We don't have a specified CPU. Use the architecture to
26202 Note: it might be better to do this unconditionally, then the
26203 assembler would not need to know about all new CPU names as
26205 if (!arm_active_target
.core_name
)
26207 /* armv7ve doesn't support any extensions. */
26208 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26210 /* Keep backward compatability for assemblers
26211 which don't support armv7ve. */
26212 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26213 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26214 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26215 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26216 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26219 arm_print_asm_arch_directives ();
26221 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26222 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26223 arm_active_target
.core_name
+ 8);
26226 const char* truncated_name
26227 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26228 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26231 if (print_tune_info
)
26232 arm_print_tune_info ();
26234 if (! TARGET_SOFT_FLOAT
)
26236 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26237 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26239 if (TARGET_HARD_FLOAT_ABI
)
26240 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26243 /* Some of these attributes only apply when the corresponding features
26244 are used. However we don't have any easy way of figuring this out.
26245 Conservatively record the setting that would have been used. */
26247 if (flag_rounding_math
)
26248 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26250 if (!flag_unsafe_math_optimizations
)
26252 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26253 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26255 if (flag_signaling_nans
)
26256 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26258 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26259 flag_finite_math_only
? 1 : 3);
26261 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26262 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26263 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26264 flag_short_enums
? 1 : 2);
26266 /* Tag_ABI_optimization_goals. */
26269 else if (optimize
>= 2)
26275 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26277 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26280 if (arm_fp16_format
)
26281 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26282 (int) arm_fp16_format
);
26284 if (arm_lang_output_object_attributes_hook
)
26285 arm_lang_output_object_attributes_hook();
26288 default_file_start ();
26292 arm_file_end (void)
26296 if (NEED_INDICATE_EXEC_STACK
)
26297 /* Add .note.GNU-stack. */
26298 file_end_indicate_exec_stack ();
26300 if (! thumb_call_reg_needed
)
26303 switch_to_section (text_section
);
26304 asm_fprintf (asm_out_file
, "\t.code 16\n");
26305 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26307 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26309 rtx label
= thumb_call_via_label
[regno
];
26313 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26314 CODE_LABEL_NUMBER (label
));
26315 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26321 /* Symbols in the text segment can be accessed without indirecting via the
26322 constant pool; it may take an extra binary operation, but this is still
26323 faster than indirecting via memory. Don't do this when not optimizing,
26324 since we won't be calculating al of the offsets necessary to do this
26328 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26330 if (optimize
> 0 && TREE_CONSTANT (decl
))
26331 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26333 default_encode_section_info (decl
, rtl
, first
);
26335 #endif /* !ARM_PE */
26338 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26340 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26341 && !strcmp (prefix
, "L"))
26343 arm_ccfsm_state
= 0;
26344 arm_target_insn
= NULL
;
26346 default_internal_label (stream
, prefix
, labelno
);
26349 /* Output code to add DELTA to the first argument, and then jump
26350 to FUNCTION. Used for C++ multiple inheritance. */
26353 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26354 HOST_WIDE_INT
, tree function
)
26356 static int thunk_label
= 0;
26359 int mi_delta
= delta
;
26360 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26362 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26365 mi_delta
= - mi_delta
;
26367 final_start_function (emit_barrier (), file
, 1);
26371 int labelno
= thunk_label
++;
26372 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26373 /* Thunks are entered in arm mode when available. */
26374 if (TARGET_THUMB1_ONLY
)
26376 /* push r3 so we can use it as a temporary. */
26377 /* TODO: Omit this save if r3 is not used. */
26378 fputs ("\tpush {r3}\n", file
);
26379 fputs ("\tldr\tr3, ", file
);
26383 fputs ("\tldr\tr12, ", file
);
26385 assemble_name (file
, label
);
26386 fputc ('\n', file
);
26389 /* If we are generating PIC, the ldr instruction below loads
26390 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26391 the address of the add + 8, so we have:
26393 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26396 Note that we have "+ 1" because some versions of GNU ld
26397 don't set the low bit of the result for R_ARM_REL32
26398 relocations against thumb function symbols.
26399 On ARMv6M this is +4, not +8. */
26400 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26401 assemble_name (file
, labelpc
);
26402 fputs (":\n", file
);
26403 if (TARGET_THUMB1_ONLY
)
26405 /* This is 2 insns after the start of the thunk, so we know it
26406 is 4-byte aligned. */
26407 fputs ("\tadd\tr3, pc, r3\n", file
);
26408 fputs ("\tmov r12, r3\n", file
);
26411 fputs ("\tadd\tr12, pc, r12\n", file
);
26413 else if (TARGET_THUMB1_ONLY
)
26414 fputs ("\tmov r12, r3\n", file
);
26416 if (TARGET_THUMB1_ONLY
)
26418 if (mi_delta
> 255)
26420 fputs ("\tldr\tr3, ", file
);
26421 assemble_name (file
, label
);
26422 fputs ("+4\n", file
);
26423 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26424 mi_op
, this_regno
, this_regno
);
26426 else if (mi_delta
!= 0)
26428 /* Thumb1 unified syntax requires s suffix in instruction name when
26429 one of the operands is immediate. */
26430 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26431 mi_op
, this_regno
, this_regno
,
26437 /* TODO: Use movw/movt for large constants when available. */
26438 while (mi_delta
!= 0)
26440 if ((mi_delta
& (3 << shift
)) == 0)
26444 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26445 mi_op
, this_regno
, this_regno
,
26446 mi_delta
& (0xff << shift
));
26447 mi_delta
&= ~(0xff << shift
);
26454 if (TARGET_THUMB1_ONLY
)
26455 fputs ("\tpop\t{r3}\n", file
);
26457 fprintf (file
, "\tbx\tr12\n");
26458 ASM_OUTPUT_ALIGN (file
, 2);
26459 assemble_name (file
, label
);
26460 fputs (":\n", file
);
26463 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26464 rtx tem
= XEXP (DECL_RTL (function
), 0);
26465 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26466 pipeline offset is four rather than eight. Adjust the offset
26468 tem
= plus_constant (GET_MODE (tem
), tem
,
26469 TARGET_THUMB1_ONLY
? -3 : -7);
26470 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26472 gen_rtx_SYMBOL_REF (Pmode
,
26473 ggc_strdup (labelpc
)));
26474 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26477 /* Output ".word .LTHUNKn". */
26478 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26480 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26481 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26485 fputs ("\tb\t", file
);
26486 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26487 if (NEED_PLT_RELOC
)
26488 fputs ("(PLT)", file
);
26489 fputc ('\n', file
);
26492 final_end_function ();
26495 /* MI thunk handling for TARGET_32BIT. */
26498 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26499 HOST_WIDE_INT vcall_offset
, tree function
)
26501 /* On ARM, this_regno is R0 or R1 depending on
26502 whether the function returns an aggregate or not.
26504 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26506 ? R1_REGNUM
: R0_REGNUM
);
26508 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26509 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26510 reload_completed
= 1;
26511 emit_note (NOTE_INSN_PROLOGUE_END
);
26513 /* Add DELTA to THIS_RTX. */
26515 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26516 delta
, this_rtx
, this_rtx
, false);
26518 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26519 if (vcall_offset
!= 0)
26521 /* Load *THIS_RTX. */
26522 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26523 /* Compute *THIS_RTX + VCALL_OFFSET. */
26524 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26526 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26527 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26528 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26531 /* Generate a tail call to the target function. */
26532 if (!TREE_USED (function
))
26534 assemble_external (function
);
26535 TREE_USED (function
) = 1;
26537 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26538 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26539 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26540 SIBLING_CALL_P (insn
) = 1;
26542 insn
= get_insns ();
26543 shorten_branches (insn
);
26544 final_start_function (insn
, file
, 1);
26545 final (insn
, file
, 1);
26546 final_end_function ();
26548 /* Stop pretending this is a post-reload pass. */
26549 reload_completed
= 0;
26552 /* Output code to add DELTA to the first argument, and then jump
26553 to FUNCTION. Used for C++ multiple inheritance. */
26556 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26557 HOST_WIDE_INT vcall_offset
, tree function
)
26560 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26562 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26566 arm_emit_vector_const (FILE *file
, rtx x
)
26569 const char * pattern
;
26571 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26573 switch (GET_MODE (x
))
26575 case E_V2SImode
: pattern
= "%08x"; break;
26576 case E_V4HImode
: pattern
= "%04x"; break;
26577 case E_V8QImode
: pattern
= "%02x"; break;
26578 default: gcc_unreachable ();
26581 fprintf (file
, "0x");
26582 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26586 element
= CONST_VECTOR_ELT (x
, i
);
26587 fprintf (file
, pattern
, INTVAL (element
));
26593 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26594 HFmode constant pool entries are actually loaded with ldr. */
26596 arm_emit_fp16_const (rtx c
)
26600 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26601 if (WORDS_BIG_ENDIAN
)
26602 assemble_zeros (2);
26603 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26604 if (!WORDS_BIG_ENDIAN
)
26605 assemble_zeros (2);
26609 arm_output_load_gr (rtx
*operands
)
26616 if (!MEM_P (operands
[1])
26617 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26618 || !REG_P (reg
= XEXP (sum
, 0))
26619 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26620 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26621 return "wldrw%?\t%0, %1";
26623 /* Fix up an out-of-range load of a GR register. */
26624 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26625 wcgr
= operands
[0];
26627 output_asm_insn ("ldr%?\t%0, %1", operands
);
26629 operands
[0] = wcgr
;
26631 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26632 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26637 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26639 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26640 named arg and all anonymous args onto the stack.
26641 XXX I know the prologue shouldn't be pushing registers, but it is faster
26645 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26649 int second_time ATTRIBUTE_UNUSED
)
26651 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26654 cfun
->machine
->uses_anonymous_args
= 1;
26655 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26657 nregs
= pcum
->aapcs_ncrn
;
26660 int res
= arm_needs_doubleword_align (mode
, type
);
26661 if (res
< 0 && warn_psabi
)
26662 inform (input_location
, "parameter passing for argument of "
26663 "type %qT changed in GCC 7.1", type
);
26669 nregs
= pcum
->nregs
;
26671 if (nregs
< NUM_ARG_REGS
)
26672 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26675 /* We can't rely on the caller doing the proper promotion when
26676 using APCS or ATPCS. */
26679 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26681 return !TARGET_AAPCS_BASED
;
26684 static machine_mode
26685 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26687 int *punsignedp ATTRIBUTE_UNUSED
,
26688 const_tree fntype ATTRIBUTE_UNUSED
,
26689 int for_return ATTRIBUTE_UNUSED
)
26691 if (GET_MODE_CLASS (mode
) == MODE_INT
26692 && GET_MODE_SIZE (mode
) < 4)
26700 arm_default_short_enums (void)
26702 return ARM_DEFAULT_SHORT_ENUMS
;
26706 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26709 arm_align_anon_bitfield (void)
26711 return TARGET_AAPCS_BASED
;
26715 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26718 arm_cxx_guard_type (void)
26720 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26724 /* The EABI says test the least significant bit of a guard variable. */
26727 arm_cxx_guard_mask_bit (void)
26729 return TARGET_AAPCS_BASED
;
26733 /* The EABI specifies that all array cookies are 8 bytes long. */
26736 arm_get_cookie_size (tree type
)
26740 if (!TARGET_AAPCS_BASED
)
26741 return default_cxx_get_cookie_size (type
);
26743 size
= build_int_cst (sizetype
, 8);
26748 /* The EABI says that array cookies should also contain the element size. */
26751 arm_cookie_has_size (void)
26753 return TARGET_AAPCS_BASED
;
26757 /* The EABI says constructors and destructors should return a pointer to
26758 the object constructed/destroyed. */
26761 arm_cxx_cdtor_returns_this (void)
26763 return TARGET_AAPCS_BASED
;
26766 /* The EABI says that an inline function may never be the key
26770 arm_cxx_key_method_may_be_inline (void)
26772 return !TARGET_AAPCS_BASED
;
26776 arm_cxx_determine_class_data_visibility (tree decl
)
26778 if (!TARGET_AAPCS_BASED
26779 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26782 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26783 is exported. However, on systems without dynamic vague linkage,
26784 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26785 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26786 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26788 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26789 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26793 arm_cxx_class_data_always_comdat (void)
26795 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26796 vague linkage if the class has no key function. */
26797 return !TARGET_AAPCS_BASED
;
26801 /* The EABI says __aeabi_atexit should be used to register static
26805 arm_cxx_use_aeabi_atexit (void)
26807 return TARGET_AAPCS_BASED
;
26812 arm_set_return_address (rtx source
, rtx scratch
)
26814 arm_stack_offsets
*offsets
;
26815 HOST_WIDE_INT delta
;
26817 unsigned long saved_regs
;
26819 offsets
= arm_get_frame_offsets ();
26820 saved_regs
= offsets
->saved_regs_mask
;
26822 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26823 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26826 if (frame_pointer_needed
)
26827 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26830 /* LR will be the first saved register. */
26831 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26836 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26837 GEN_INT (delta
& ~4095)));
26842 addr
= stack_pointer_rtx
;
26844 addr
= plus_constant (Pmode
, addr
, delta
);
26846 /* The store needs to be marked as frame related in order to prevent
26847 DSE from deleting it as dead if it is based on fp. */
26848 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26849 RTX_FRAME_RELATED_P (insn
) = 1;
26850 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26856 thumb_set_return_address (rtx source
, rtx scratch
)
26858 arm_stack_offsets
*offsets
;
26859 HOST_WIDE_INT delta
;
26860 HOST_WIDE_INT limit
;
26863 unsigned long mask
;
26867 offsets
= arm_get_frame_offsets ();
26868 mask
= offsets
->saved_regs_mask
;
26869 if (mask
& (1 << LR_REGNUM
))
26872 /* Find the saved regs. */
26873 if (frame_pointer_needed
)
26875 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26876 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26882 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26885 /* Allow for the stack frame. */
26886 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26888 /* The link register is always the first saved register. */
26891 /* Construct the address. */
26892 addr
= gen_rtx_REG (SImode
, reg
);
26895 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26896 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26900 addr
= plus_constant (Pmode
, addr
, delta
);
26902 /* The store needs to be marked as frame related in order to prevent
26903 DSE from deleting it as dead if it is based on fp. */
26904 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26905 RTX_FRAME_RELATED_P (insn
) = 1;
26906 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26909 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26912 /* Implements target hook vector_mode_supported_p. */
26914 arm_vector_mode_supported_p (machine_mode mode
)
26916 /* Neon also supports V2SImode, etc. listed in the clause below. */
26917 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26918 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26919 || mode
== V2DImode
|| mode
== V8HFmode
))
26922 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26923 && ((mode
== V2SImode
)
26924 || (mode
== V4HImode
)
26925 || (mode
== V8QImode
)))
26928 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26929 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26930 || mode
== V2HAmode
))
26936 /* Implements target hook array_mode_supported_p. */
26939 arm_array_mode_supported_p (machine_mode mode
,
26940 unsigned HOST_WIDE_INT nelems
)
26943 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26944 && (nelems
>= 2 && nelems
<= 4))
26950 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26951 registers when autovectorizing for Neon, at least until multiple vector
26952 widths are supported properly by the middle-end. */
26954 static machine_mode
26955 arm_preferred_simd_mode (scalar_mode mode
)
26961 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26963 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26965 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26967 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26969 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26976 if (TARGET_REALLY_IWMMXT
)
26992 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26994 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26995 using r0-r4 for function arguments, r7 for the stack frame and don't have
26996 enough left over to do doubleword arithmetic. For Thumb-2 all the
26997 potentially problematic instructions accept high registers so this is not
26998 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26999 that require many low registers. */
27001 arm_class_likely_spilled_p (reg_class_t rclass
)
27003 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
27004 || rclass
== CC_REG
)
27010 /* Implements target hook small_register_classes_for_mode_p. */
27012 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
27014 return TARGET_THUMB1
;
27017 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27018 ARM insns and therefore guarantee that the shift count is modulo 256.
27019 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27020 guarantee no particular behavior for out-of-range counts. */
27022 static unsigned HOST_WIDE_INT
27023 arm_shift_truncation_mask (machine_mode mode
)
27025 return mode
== SImode
? 255 : 0;
27029 /* Map internal gcc register numbers to DWARF2 register numbers. */
27032 arm_dbx_register_number (unsigned int regno
)
27037 if (IS_VFP_REGNUM (regno
))
27039 /* See comment in arm_dwarf_register_span. */
27040 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27041 return 64 + regno
- FIRST_VFP_REGNUM
;
27043 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
27046 if (IS_IWMMXT_GR_REGNUM (regno
))
27047 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
27049 if (IS_IWMMXT_REGNUM (regno
))
27050 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
27052 return DWARF_FRAME_REGISTERS
;
27055 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27056 GCC models tham as 64 32-bit registers, so we need to describe this to
27057 the DWARF generation code. Other registers can use the default. */
27059 arm_dwarf_register_span (rtx rtl
)
27067 regno
= REGNO (rtl
);
27068 if (!IS_VFP_REGNUM (regno
))
27071 /* XXX FIXME: The EABI defines two VFP register ranges:
27072 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27074 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27075 corresponding D register. Until GDB supports this, we shall use the
27076 legacy encodings. We also use these encodings for D0-D15 for
27077 compatibility with older debuggers. */
27078 mode
= GET_MODE (rtl
);
27079 if (GET_MODE_SIZE (mode
) < 8)
27082 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27084 nregs
= GET_MODE_SIZE (mode
) / 4;
27085 for (i
= 0; i
< nregs
; i
+= 2)
27086 if (TARGET_BIG_END
)
27088 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27089 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27093 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27094 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27099 nregs
= GET_MODE_SIZE (mode
) / 8;
27100 for (i
= 0; i
< nregs
; i
++)
27101 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27104 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27107 #if ARM_UNWIND_INFO
27108 /* Emit unwind directives for a store-multiple instruction or stack pointer
27109 push during alignment.
27110 These should only ever be generated by the function prologue code, so
27111 expect them to have a particular form.
27112 The store-multiple instruction sometimes pushes pc as the last register,
27113 although it should not be tracked into unwind information, or for -Os
27114 sometimes pushes some dummy registers before first register that needs
27115 to be tracked in unwind information; such dummy registers are there just
27116 to avoid separate stack adjustment, and will not be restored in the
27120 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27123 HOST_WIDE_INT offset
;
27124 HOST_WIDE_INT nregs
;
27128 unsigned padfirst
= 0, padlast
= 0;
27131 e
= XVECEXP (p
, 0, 0);
27132 gcc_assert (GET_CODE (e
) == SET
);
27134 /* First insn will adjust the stack pointer. */
27135 gcc_assert (GET_CODE (e
) == SET
27136 && REG_P (SET_DEST (e
))
27137 && REGNO (SET_DEST (e
)) == SP_REGNUM
27138 && GET_CODE (SET_SRC (e
)) == PLUS
);
27140 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27141 nregs
= XVECLEN (p
, 0) - 1;
27142 gcc_assert (nregs
);
27144 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27147 /* For -Os dummy registers can be pushed at the beginning to
27148 avoid separate stack pointer adjustment. */
27149 e
= XVECEXP (p
, 0, 1);
27150 e
= XEXP (SET_DEST (e
), 0);
27151 if (GET_CODE (e
) == PLUS
)
27152 padfirst
= INTVAL (XEXP (e
, 1));
27153 gcc_assert (padfirst
== 0 || optimize_size
);
27154 /* The function prologue may also push pc, but not annotate it as it is
27155 never restored. We turn this into a stack pointer adjustment. */
27156 e
= XVECEXP (p
, 0, nregs
);
27157 e
= XEXP (SET_DEST (e
), 0);
27158 if (GET_CODE (e
) == PLUS
)
27159 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27161 padlast
= offset
- 4;
27162 gcc_assert (padlast
== 0 || padlast
== 4);
27164 fprintf (asm_out_file
, "\t.pad #4\n");
27166 fprintf (asm_out_file
, "\t.save {");
27168 else if (IS_VFP_REGNUM (reg
))
27171 fprintf (asm_out_file
, "\t.vsave {");
27174 /* Unknown register type. */
27175 gcc_unreachable ();
27177 /* If the stack increment doesn't match the size of the saved registers,
27178 something has gone horribly wrong. */
27179 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27183 /* The remaining insns will describe the stores. */
27184 for (i
= 1; i
<= nregs
; i
++)
27186 /* Expect (set (mem <addr>) (reg)).
27187 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27188 e
= XVECEXP (p
, 0, i
);
27189 gcc_assert (GET_CODE (e
) == SET
27190 && MEM_P (SET_DEST (e
))
27191 && REG_P (SET_SRC (e
)));
27193 reg
= REGNO (SET_SRC (e
));
27194 gcc_assert (reg
>= lastreg
);
27197 fprintf (asm_out_file
, ", ");
27198 /* We can't use %r for vfp because we need to use the
27199 double precision register names. */
27200 if (IS_VFP_REGNUM (reg
))
27201 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27203 asm_fprintf (asm_out_file
, "%r", reg
);
27207 /* Check that the addresses are consecutive. */
27208 e
= XEXP (SET_DEST (e
), 0);
27209 if (GET_CODE (e
) == PLUS
)
27210 gcc_assert (REG_P (XEXP (e
, 0))
27211 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27212 && CONST_INT_P (XEXP (e
, 1))
27213 && offset
== INTVAL (XEXP (e
, 1)));
27217 && REGNO (e
) == SP_REGNUM
);
27218 offset
+= reg_size
;
27221 fprintf (asm_out_file
, "}\n");
27223 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27226 /* Emit unwind directives for a SET. */
27229 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27237 switch (GET_CODE (e0
))
27240 /* Pushing a single register. */
27241 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27242 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27243 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27246 asm_fprintf (asm_out_file
, "\t.save ");
27247 if (IS_VFP_REGNUM (REGNO (e1
)))
27248 asm_fprintf(asm_out_file
, "{d%d}\n",
27249 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27251 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27255 if (REGNO (e0
) == SP_REGNUM
)
27257 /* A stack increment. */
27258 if (GET_CODE (e1
) != PLUS
27259 || !REG_P (XEXP (e1
, 0))
27260 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27261 || !CONST_INT_P (XEXP (e1
, 1)))
27264 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27265 -INTVAL (XEXP (e1
, 1)));
27267 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27269 HOST_WIDE_INT offset
;
27271 if (GET_CODE (e1
) == PLUS
)
27273 if (!REG_P (XEXP (e1
, 0))
27274 || !CONST_INT_P (XEXP (e1
, 1)))
27276 reg
= REGNO (XEXP (e1
, 0));
27277 offset
= INTVAL (XEXP (e1
, 1));
27278 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27279 HARD_FRAME_POINTER_REGNUM
, reg
,
27282 else if (REG_P (e1
))
27285 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27286 HARD_FRAME_POINTER_REGNUM
, reg
);
27291 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27293 /* Move from sp to reg. */
27294 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27296 else if (GET_CODE (e1
) == PLUS
27297 && REG_P (XEXP (e1
, 0))
27298 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27299 && CONST_INT_P (XEXP (e1
, 1)))
27301 /* Set reg to offset from sp. */
27302 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27303 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27315 /* Emit unwind directives for the given insn. */
27318 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27321 bool handled_one
= false;
27323 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27326 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27327 && (TREE_NOTHROW (current_function_decl
)
27328 || crtl
->all_throwers_are_sibcalls
))
27331 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27334 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27336 switch (REG_NOTE_KIND (note
))
27338 case REG_FRAME_RELATED_EXPR
:
27339 pat
= XEXP (note
, 0);
27342 case REG_CFA_REGISTER
:
27343 pat
= XEXP (note
, 0);
27346 pat
= PATTERN (insn
);
27347 if (GET_CODE (pat
) == PARALLEL
)
27348 pat
= XVECEXP (pat
, 0, 0);
27351 /* Only emitted for IS_STACKALIGN re-alignment. */
27356 src
= SET_SRC (pat
);
27357 dest
= SET_DEST (pat
);
27359 gcc_assert (src
== stack_pointer_rtx
);
27360 reg
= REGNO (dest
);
27361 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27364 handled_one
= true;
27367 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27368 to get correct dwarf information for shrink-wrap. We should not
27369 emit unwind information for it because these are used either for
27370 pretend arguments or notes to adjust sp and restore registers from
27372 case REG_CFA_DEF_CFA
:
27373 case REG_CFA_ADJUST_CFA
:
27374 case REG_CFA_RESTORE
:
27377 case REG_CFA_EXPRESSION
:
27378 case REG_CFA_OFFSET
:
27379 /* ??? Only handling here what we actually emit. */
27380 gcc_unreachable ();
27388 pat
= PATTERN (insn
);
27391 switch (GET_CODE (pat
))
27394 arm_unwind_emit_set (asm_out_file
, pat
);
27398 /* Store multiple. */
27399 arm_unwind_emit_sequence (asm_out_file
, pat
);
27408 /* Output a reference from a function exception table to the type_info
27409 object X. The EABI specifies that the symbol should be relocated by
27410 an R_ARM_TARGET2 relocation. */
27413 arm_output_ttype (rtx x
)
27415 fputs ("\t.word\t", asm_out_file
);
27416 output_addr_const (asm_out_file
, x
);
27417 /* Use special relocations for symbol references. */
27418 if (!CONST_INT_P (x
))
27419 fputs ("(TARGET2)", asm_out_file
);
27420 fputc ('\n', asm_out_file
);
27425 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27428 arm_asm_emit_except_personality (rtx personality
)
27430 fputs ("\t.personality\t", asm_out_file
);
27431 output_addr_const (asm_out_file
, personality
);
27432 fputc ('\n', asm_out_file
);
27434 #endif /* ARM_UNWIND_INFO */
27436 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27439 arm_asm_init_sections (void)
27441 #if ARM_UNWIND_INFO
27442 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27444 #endif /* ARM_UNWIND_INFO */
27446 #ifdef OBJECT_FORMAT_ELF
27447 if (target_pure_code
)
27448 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27452 /* Output unwind directives for the start/end of a function. */
27455 arm_output_fn_unwind (FILE * f
, bool prologue
)
27457 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27461 fputs ("\t.fnstart\n", f
);
27464 /* If this function will never be unwound, then mark it as such.
27465 The came condition is used in arm_unwind_emit to suppress
27466 the frame annotations. */
27467 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27468 && (TREE_NOTHROW (current_function_decl
)
27469 || crtl
->all_throwers_are_sibcalls
))
27470 fputs("\t.cantunwind\n", f
);
27472 fputs ("\t.fnend\n", f
);
27477 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27479 enum tls_reloc reloc
;
27482 val
= XVECEXP (x
, 0, 0);
27483 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27485 output_addr_const (fp
, val
);
27490 fputs ("(tlsgd)", fp
);
27493 fputs ("(tlsldm)", fp
);
27496 fputs ("(tlsldo)", fp
);
27499 fputs ("(gottpoff)", fp
);
27502 fputs ("(tpoff)", fp
);
27505 fputs ("(tlsdesc)", fp
);
27508 gcc_unreachable ();
27517 fputs (" + (. - ", fp
);
27518 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27519 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27520 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27521 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27531 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27534 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27536 gcc_assert (size
== 4);
27537 fputs ("\t.word\t", file
);
27538 output_addr_const (file
, x
);
27539 fputs ("(tlsldo)", file
);
27542 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27545 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27547 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27548 return arm_emit_tls_decoration (fp
, x
);
27549 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27552 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27554 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27555 assemble_name_raw (fp
, label
);
27559 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27561 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27565 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27569 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27571 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27575 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27579 else if (GET_CODE (x
) == CONST_VECTOR
)
27580 return arm_emit_vector_const (fp
, x
);
27585 /* Output assembly for a shift instruction.
27586 SET_FLAGS determines how the instruction modifies the condition codes.
27587 0 - Do not set condition codes.
27588 1 - Set condition codes.
27589 2 - Use smallest instruction. */
27591 arm_output_shift(rtx
* operands
, int set_flags
)
27594 static const char flag_chars
[3] = {'?', '.', '!'};
27599 c
= flag_chars
[set_flags
];
27600 shift
= shift_op(operands
[3], &val
);
27604 operands
[2] = GEN_INT(val
);
27605 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27608 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27610 output_asm_insn (pattern
, operands
);
27614 /* Output assembly for a WMMX immediate shift instruction. */
27616 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27618 int shift
= INTVAL (operands
[2]);
27620 machine_mode opmode
= GET_MODE (operands
[0]);
27622 gcc_assert (shift
>= 0);
27624 /* If the shift value in the register versions is > 63 (for D qualifier),
27625 31 (for W qualifier) or 15 (for H qualifier). */
27626 if (((opmode
== V4HImode
) && (shift
> 15))
27627 || ((opmode
== V2SImode
) && (shift
> 31))
27628 || ((opmode
== DImode
) && (shift
> 63)))
27632 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27633 output_asm_insn (templ
, operands
);
27634 if (opmode
== DImode
)
27636 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27637 output_asm_insn (templ
, operands
);
27642 /* The destination register will contain all zeros. */
27643 sprintf (templ
, "wzero\t%%0");
27644 output_asm_insn (templ
, operands
);
27649 if ((opmode
== DImode
) && (shift
> 32))
27651 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27652 output_asm_insn (templ
, operands
);
27653 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27654 output_asm_insn (templ
, operands
);
27658 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27659 output_asm_insn (templ
, operands
);
27664 /* Output assembly for a WMMX tinsr instruction. */
27666 arm_output_iwmmxt_tinsr (rtx
*operands
)
27668 int mask
= INTVAL (operands
[3]);
27671 int units
= mode_nunits
[GET_MODE (operands
[0])];
27672 gcc_assert ((mask
& (mask
- 1)) == 0);
27673 for (i
= 0; i
< units
; ++i
)
27675 if ((mask
& 0x01) == 1)
27681 gcc_assert (i
< units
);
27683 switch (GET_MODE (operands
[0]))
27686 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27689 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27692 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27695 gcc_unreachable ();
27698 output_asm_insn (templ
, operands
);
27703 /* Output a Thumb-1 casesi dispatch sequence. */
27705 thumb1_output_casesi (rtx
*operands
)
27707 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27709 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27711 switch (GET_MODE(diff_vec
))
27714 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27715 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27717 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27718 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27720 return "bl\t%___gnu_thumb1_case_si";
27722 gcc_unreachable ();
27726 /* Output a Thumb-2 casesi instruction. */
27728 thumb2_output_casesi (rtx
*operands
)
27730 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27732 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27734 output_asm_insn ("cmp\t%0, %1", operands
);
27735 output_asm_insn ("bhi\t%l3", operands
);
27736 switch (GET_MODE(diff_vec
))
27739 return "tbb\t[%|pc, %0]";
27741 return "tbh\t[%|pc, %0, lsl #1]";
27745 output_asm_insn ("adr\t%4, %l2", operands
);
27746 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27747 output_asm_insn ("add\t%4, %4, %5", operands
);
27752 output_asm_insn ("adr\t%4, %l2", operands
);
27753 return "ldr\t%|pc, [%4, %0, lsl #2]";
27756 gcc_unreachable ();
27760 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27761 per-core tuning structs. */
27763 arm_issue_rate (void)
27765 return current_tune
->issue_rate
;
27768 /* Return how many instructions should scheduler lookahead to choose the
27771 arm_first_cycle_multipass_dfa_lookahead (void)
27773 int issue_rate
= arm_issue_rate ();
27775 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27778 /* Enable modeling of L2 auto-prefetcher. */
27780 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27782 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27786 arm_mangle_type (const_tree type
)
27788 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27789 has to be managled as if it is in the "std" namespace. */
27790 if (TARGET_AAPCS_BASED
27791 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27792 return "St9__va_list";
27794 /* Half-precision float. */
27795 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27798 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27800 if (TYPE_NAME (type
) != NULL
)
27801 return arm_mangle_builtin_type (type
);
27803 /* Use the default mangling. */
27807 /* Order of allocation of core registers for Thumb: this allocation is
27808 written over the corresponding initial entries of the array
27809 initialized with REG_ALLOC_ORDER. We allocate all low registers
27810 first. Saving and restoring a low register is usually cheaper than
27811 using a call-clobbered high register. */
27813 static const int thumb_core_reg_alloc_order
[] =
27815 3, 2, 1, 0, 4, 5, 6, 7,
27816 12, 14, 8, 9, 10, 11
27819 /* Adjust register allocation order when compiling for Thumb. */
27822 arm_order_regs_for_local_alloc (void)
27824 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27825 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27827 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27828 sizeof (thumb_core_reg_alloc_order
));
27831 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27834 arm_frame_pointer_required (void)
27836 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27839 /* If the function receives nonlocal gotos, it needs to save the frame
27840 pointer in the nonlocal_goto_save_area object. */
27841 if (cfun
->has_nonlocal_label
)
27844 /* The frame pointer is required for non-leaf APCS frames. */
27845 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27848 /* If we are probing the stack in the prologue, we will have a faulting
27849 instruction prior to the stack adjustment and this requires a frame
27850 pointer if we want to catch the exception using the EABI unwinder. */
27851 if (!IS_INTERRUPT (arm_current_func_type ())
27852 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27853 && arm_except_unwind_info (&global_options
) == UI_TARGET
27854 && cfun
->can_throw_non_call_exceptions
)
27856 HOST_WIDE_INT size
= get_frame_size ();
27858 /* That's irrelevant if there is no stack adjustment. */
27862 /* That's relevant only if there is a stack probe. */
27863 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27865 /* We don't have the final size of the frame so adjust. */
27866 size
+= 32 * UNITS_PER_WORD
;
27867 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27877 /* Only thumb1 can't support conditional execution, so return true if
27878 the target is not thumb1. */
27880 arm_have_conditional_execution (void)
27882 return !TARGET_THUMB1
;
27885 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27886 static HOST_WIDE_INT
27887 arm_vector_alignment (const_tree type
)
27889 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27891 if (TARGET_AAPCS_BASED
)
27892 align
= MIN (align
, 64);
27897 static unsigned int
27898 arm_autovectorize_vector_sizes (void)
27900 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27904 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27906 /* Vectors which aren't in packed structures will not be less aligned than
27907 the natural alignment of their element type, so this is safe. */
27908 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27911 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27915 arm_builtin_support_vector_misalignment (machine_mode mode
,
27916 const_tree type
, int misalignment
,
27919 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27921 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27926 /* If the misalignment is unknown, we should be able to handle the access
27927 so long as it is not to a member of a packed data structure. */
27928 if (misalignment
== -1)
27931 /* Return true if the misalignment is a multiple of the natural alignment
27932 of the vector's element type. This is probably always going to be
27933 true in practice, since we've already established that this isn't a
27935 return ((misalignment
% align
) == 0);
27938 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27943 arm_conditional_register_usage (void)
27947 if (TARGET_THUMB1
&& optimize_size
)
27949 /* When optimizing for size on Thumb-1, it's better not
27950 to use the HI regs, because of the overhead of
27952 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27953 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27956 /* The link register can be clobbered by any branch insn,
27957 but we have no way to track that at present, so mark
27958 it as unavailable. */
27960 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27962 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
27964 /* VFPv3 registers are disabled when earlier VFP
27965 versions are selected due to the definition of
27966 LAST_VFP_REGNUM. */
27967 for (regno
= FIRST_VFP_REGNUM
;
27968 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27970 fixed_regs
[regno
] = 0;
27971 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27972 || regno
>= FIRST_VFP_REGNUM
+ 32;
27976 if (TARGET_REALLY_IWMMXT
)
27978 regno
= FIRST_IWMMXT_GR_REGNUM
;
27979 /* The 2002/10/09 revision of the XScale ABI has wCG0
27980 and wCG1 as call-preserved registers. The 2002/11/21
27981 revision changed this so that all wCG registers are
27982 scratch registers. */
27983 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27984 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27985 fixed_regs
[regno
] = 0;
27986 /* The XScale ABI has wR0 - wR9 as scratch registers,
27987 the rest as call-preserved registers. */
27988 for (regno
= FIRST_IWMMXT_REGNUM
;
27989 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27991 fixed_regs
[regno
] = 0;
27992 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27996 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27998 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27999 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28001 else if (TARGET_APCS_STACK
)
28003 fixed_regs
[10] = 1;
28004 call_used_regs
[10] = 1;
28006 /* -mcaller-super-interworking reserves r11 for calls to
28007 _interwork_r11_call_via_rN(). Making the register global
28008 is an easy way of ensuring that it remains valid for all
28010 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
28011 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
28013 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28014 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28015 if (TARGET_CALLER_INTERWORKING
)
28016 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28018 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28022 arm_preferred_rename_class (reg_class_t rclass
)
28024 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28025 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28026 and code size can be reduced. */
28027 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
28033 /* Compute the attribute "length" of insn "*push_multi".
28034 So this function MUST be kept in sync with that insn pattern. */
28036 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
28038 int i
, regno
, hi_reg
;
28039 int num_saves
= XVECLEN (parallel_op
, 0);
28049 regno
= REGNO (first_op
);
28050 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28051 list is 8-bit. Normally this means all registers in the list must be
28052 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28053 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28054 with 16-bit encoding. */
28055 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28056 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
28058 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
28059 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28067 /* Compute the attribute "length" of insn. Currently, this function is used
28068 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28069 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28070 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28071 true if OPERANDS contains insn which explicit updates base register. */
28074 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28083 rtx parallel_op
= operands
[0];
28084 /* Initialize to elements number of PARALLEL. */
28085 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28086 /* Initialize the value to base register. */
28087 unsigned regno
= REGNO (operands
[1]);
28088 /* Skip return and write back pattern.
28089 We only need register pop pattern for later analysis. */
28090 unsigned first_indx
= 0;
28091 first_indx
+= return_pc
? 1 : 0;
28092 first_indx
+= write_back_p
? 1 : 0;
28094 /* A pop operation can be done through LDM or POP. If the base register is SP
28095 and if it's with write back, then a LDM will be alias of POP. */
28096 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28097 bool ldm_p
= !pop_p
;
28099 /* Check base register for LDM. */
28100 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28103 /* Check each register in the list. */
28104 for (; indx
>= first_indx
; indx
--)
28106 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28107 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28108 comment in arm_attr_length_push_multi. */
28109 if (REGNO_REG_CLASS (regno
) == HI_REGS
28110 && (regno
!= PC_REGNUM
|| ldm_p
))
28117 /* Compute the number of instructions emitted by output_move_double. */
28119 arm_count_output_move_double_insns (rtx
*operands
)
28123 /* output_move_double may modify the operands array, so call it
28124 here on a copy of the array. */
28125 ops
[0] = operands
[0];
28126 ops
[1] = operands
[1];
28127 output_move_double (ops
, false, &count
);
28132 vfp3_const_double_for_fract_bits (rtx operand
)
28134 REAL_VALUE_TYPE r0
;
28136 if (!CONST_DOUBLE_P (operand
))
28139 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28140 if (exact_real_inverse (DFmode
, &r0
)
28141 && !REAL_VALUE_NEGATIVE (r0
))
28143 if (exact_real_truncate (DFmode
, &r0
))
28145 HOST_WIDE_INT value
= real_to_integer (&r0
);
28146 value
= value
& 0xffffffff;
28147 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28149 int ret
= exact_log2 (value
);
28150 gcc_assert (IN_RANGE (ret
, 0, 31));
28158 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28159 log2 is in [1, 32], return that log2. Otherwise return -1.
28160 This is used in the patterns for vcvt.s32.f32 floating-point to
28161 fixed-point conversions. */
28164 vfp3_const_double_for_bits (rtx x
)
28166 const REAL_VALUE_TYPE
*r
;
28168 if (!CONST_DOUBLE_P (x
))
28171 r
= CONST_DOUBLE_REAL_VALUE (x
);
28173 if (REAL_VALUE_NEGATIVE (*r
)
28174 || REAL_VALUE_ISNAN (*r
)
28175 || REAL_VALUE_ISINF (*r
)
28176 || !real_isinteger (r
, SFmode
))
28179 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28181 /* The exact_log2 above will have returned -1 if this is
28182 not an exact log2. */
28183 if (!IN_RANGE (hwint
, 1, 32))
28190 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28193 arm_pre_atomic_barrier (enum memmodel model
)
28195 if (need_atomic_barrier_p (model
, true))
28196 emit_insn (gen_memory_barrier ());
28200 arm_post_atomic_barrier (enum memmodel model
)
28202 if (need_atomic_barrier_p (model
, false))
28203 emit_insn (gen_memory_barrier ());
28206 /* Emit the load-exclusive and store-exclusive instructions.
28207 Use acquire and release versions if necessary. */
28210 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28212 rtx (*gen
) (rtx
, rtx
);
28218 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28219 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28220 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28221 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28223 gcc_unreachable ();
28230 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28231 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
28232 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
28233 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
28235 gcc_unreachable ();
28239 emit_insn (gen (rval
, mem
));
28243 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28246 rtx (*gen
) (rtx
, rtx
, rtx
);
28252 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28253 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28254 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28255 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28257 gcc_unreachable ();
28264 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28265 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
28266 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
28267 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
28269 gcc_unreachable ();
28273 emit_insn (gen (bval
, rval
, mem
));
28276 /* Mark the previous jump instruction as unlikely. */
28279 emit_unlikely_jump (rtx insn
)
28281 rtx_insn
*jump
= emit_jump_insn (insn
);
28282 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
28285 /* Expand a compare and swap pattern. */
28288 arm_expand_compare_and_swap (rtx operands
[])
28290 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28292 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28294 bval
= operands
[0];
28295 rval
= operands
[1];
28297 oldval
= operands
[3];
28298 newval
= operands
[4];
28299 is_weak
= operands
[5];
28300 mod_s
= operands
[6];
28301 mod_f
= operands
[7];
28302 mode
= GET_MODE (mem
);
28304 /* Normally the succ memory model must be stronger than fail, but in the
28305 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28306 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28308 if (TARGET_HAVE_LDACQ
28309 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28310 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28311 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28317 /* For narrow modes, we're going to perform the comparison in SImode,
28318 so do the zero-extension now. */
28319 rval
= gen_reg_rtx (SImode
);
28320 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28324 /* Force the value into a register if needed. We waited until after
28325 the zero-extension above to do this properly. */
28326 if (!arm_add_operand (oldval
, SImode
))
28327 oldval
= force_reg (SImode
, oldval
);
28331 if (!cmpdi_operand (oldval
, mode
))
28332 oldval
= force_reg (mode
, oldval
);
28336 gcc_unreachable ();
28343 case E_QImode
: gen
= gen_atomic_compare_and_swapt1qi_1
; break;
28344 case E_HImode
: gen
= gen_atomic_compare_and_swapt1hi_1
; break;
28345 case E_SImode
: gen
= gen_atomic_compare_and_swapt1si_1
; break;
28346 case E_DImode
: gen
= gen_atomic_compare_and_swapt1di_1
; break;
28348 gcc_unreachable ();
28355 case E_QImode
: gen
= gen_atomic_compare_and_swap32qi_1
; break;
28356 case E_HImode
: gen
= gen_atomic_compare_and_swap32hi_1
; break;
28357 case E_SImode
: gen
= gen_atomic_compare_and_swap32si_1
; break;
28358 case E_DImode
: gen
= gen_atomic_compare_and_swap32di_1
; break;
28360 gcc_unreachable ();
28364 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28365 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28367 if (mode
== QImode
|| mode
== HImode
)
28368 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28370 /* In all cases, we arrange for success to be signaled by Z set.
28371 This arrangement allows for the boolean result to be used directly
28372 in a subsequent branch, post optimization. For Thumb-1 targets, the
28373 boolean negation of the result is also stored in bval because Thumb-1
28374 backend lacks dependency tracking for CC flag due to flag-setting not
28375 being represented at RTL level. */
28377 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28380 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28381 emit_insn (gen_rtx_SET (bval
, x
));
28385 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28386 another memory store between the load-exclusive and store-exclusive can
28387 reset the monitor from Exclusive to Open state. This means we must wait
28388 until after reload to split the pattern, lest we get a register spill in
28389 the middle of the atomic sequence. Success of the compare and swap is
28390 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28391 for Thumb-1 targets (ie. negation of the boolean value returned by
28392 atomic_compare_and_swapmode standard pattern in operand 0). */
28395 arm_split_compare_and_swap (rtx operands
[])
28397 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28399 enum memmodel mod_s
, mod_f
;
28401 rtx_code_label
*label1
, *label2
;
28404 rval
= operands
[1];
28406 oldval
= operands
[3];
28407 newval
= operands
[4];
28408 is_weak
= (operands
[5] != const0_rtx
);
28409 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28410 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28411 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28412 mode
= GET_MODE (mem
);
28414 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28416 bool use_acquire
= TARGET_HAVE_LDACQ
28417 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28418 || is_mm_release (mod_s
));
28420 bool use_release
= TARGET_HAVE_LDACQ
28421 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28422 || is_mm_acquire (mod_s
));
28424 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28425 a full barrier is emitted after the store-release. */
28427 use_acquire
= false;
28429 /* Checks whether a barrier is needed and emits one accordingly. */
28430 if (!(use_acquire
|| use_release
))
28431 arm_pre_atomic_barrier (mod_s
);
28436 label1
= gen_label_rtx ();
28437 emit_label (label1
);
28439 label2
= gen_label_rtx ();
28441 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28443 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28444 as required to communicate with arm_expand_compare_and_swap. */
28447 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28448 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28449 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28450 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28451 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28455 emit_move_insn (neg_bval
, const1_rtx
);
28456 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28457 if (thumb1_cmpneg_operand (oldval
, SImode
))
28458 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28461 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28464 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28466 /* Weak or strong, we want EQ to be true for success, so that we
28467 match the flags that we got from the compare above. */
28470 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28471 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28472 emit_insn (gen_rtx_SET (cond
, x
));
28477 /* Z is set to boolean value of !neg_bval, as required to communicate
28478 with arm_expand_compare_and_swap. */
28479 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28480 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28483 if (!is_mm_relaxed (mod_f
))
28484 emit_label (label2
);
28486 /* Checks whether a barrier is needed and emits one accordingly. */
28488 || !(use_acquire
|| use_release
))
28489 arm_post_atomic_barrier (mod_s
);
28491 if (is_mm_relaxed (mod_f
))
28492 emit_label (label2
);
28495 /* Split an atomic operation pattern. Operation is given by CODE and is one
28496 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28497 operation). Operation is performed on the content at MEM and on VALUE
28498 following the memory model MODEL_RTX. The content at MEM before and after
28499 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28500 success of the operation is returned in COND. Using a scratch register or
28501 an operand register for these determines what result is returned for that
28505 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28506 rtx value
, rtx model_rtx
, rtx cond
)
28508 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28509 machine_mode mode
= GET_MODE (mem
);
28510 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28511 rtx_code_label
*label
;
28512 bool all_low_regs
, bind_old_new
;
28515 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28517 bool use_acquire
= TARGET_HAVE_LDACQ
28518 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28519 || is_mm_release (model
));
28521 bool use_release
= TARGET_HAVE_LDACQ
28522 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28523 || is_mm_acquire (model
));
28525 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28526 a full barrier is emitted after the store-release. */
28528 use_acquire
= false;
28530 /* Checks whether a barrier is needed and emits one accordingly. */
28531 if (!(use_acquire
|| use_release
))
28532 arm_pre_atomic_barrier (model
);
28534 label
= gen_label_rtx ();
28535 emit_label (label
);
28538 new_out
= gen_lowpart (wmode
, new_out
);
28540 old_out
= gen_lowpart (wmode
, old_out
);
28543 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28545 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28547 /* Does the operation require destination and first operand to use the same
28548 register? This is decided by register constraints of relevant insn
28549 patterns in thumb1.md. */
28550 gcc_assert (!new_out
|| REG_P (new_out
));
28551 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28552 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28553 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28558 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28560 /* We want to return the old value while putting the result of the operation
28561 in the same register as the old value so copy the old value over to the
28562 destination register and use that register for the operation. */
28563 if (old_out
&& bind_old_new
)
28565 emit_move_insn (new_out
, old_out
);
28576 x
= gen_rtx_AND (wmode
, old_out
, value
);
28577 emit_insn (gen_rtx_SET (new_out
, x
));
28578 x
= gen_rtx_NOT (wmode
, new_out
);
28579 emit_insn (gen_rtx_SET (new_out
, x
));
28583 if (CONST_INT_P (value
))
28585 value
= GEN_INT (-INTVAL (value
));
28591 if (mode
== DImode
)
28593 /* DImode plus/minus need to clobber flags. */
28594 /* The adddi3 and subdi3 patterns are incorrectly written so that
28595 they require matching operands, even when we could easily support
28596 three operands. Thankfully, this can be fixed up post-splitting,
28597 as the individual add+adc patterns do accept three operands and
28598 post-reload cprop can make these moves go away. */
28599 emit_move_insn (new_out
, old_out
);
28601 x
= gen_adddi3 (new_out
, new_out
, value
);
28603 x
= gen_subdi3 (new_out
, new_out
, value
);
28610 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28611 emit_insn (gen_rtx_SET (new_out
, x
));
28615 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28618 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28619 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28621 /* Checks whether a barrier is needed and emits one accordingly. */
28623 || !(use_acquire
|| use_release
))
28624 arm_post_atomic_barrier (model
);
28627 #define MAX_VECT_LEN 16
28629 struct expand_vec_perm_d
28631 rtx target
, op0
, op1
;
28632 unsigned char perm
[MAX_VECT_LEN
];
28633 machine_mode vmode
;
28634 unsigned char nelt
;
28639 /* Generate a variable permutation. */
28642 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28644 machine_mode vmode
= GET_MODE (target
);
28645 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28647 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28648 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28649 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28650 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28651 gcc_checking_assert (TARGET_NEON
);
28655 if (vmode
== V8QImode
)
28656 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28658 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28664 if (vmode
== V8QImode
)
28666 pair
= gen_reg_rtx (V16QImode
);
28667 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28668 pair
= gen_lowpart (TImode
, pair
);
28669 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28673 pair
= gen_reg_rtx (OImode
);
28674 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28675 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28681 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28683 machine_mode vmode
= GET_MODE (target
);
28684 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28685 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28686 rtx rmask
[MAX_VECT_LEN
], mask
;
28688 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28689 numbering of elements for big-endian, we must reverse the order. */
28690 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28692 /* The VTBL instruction does not use a modulo index, so we must take care
28693 of that ourselves. */
28694 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28695 for (i
= 0; i
< nelt
; ++i
)
28697 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28698 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28700 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28703 /* Map lane ordering between architectural lane order, and GCC lane order,
28704 taking into account ABI. See comment above output_move_neon for details. */
28707 neon_endian_lane_map (machine_mode mode
, int lane
)
28709 if (BYTES_BIG_ENDIAN
)
28711 int nelems
= GET_MODE_NUNITS (mode
);
28712 /* Reverse lane order. */
28713 lane
= (nelems
- 1 - lane
);
28714 /* Reverse D register order, to match ABI. */
28715 if (GET_MODE_SIZE (mode
) == 16)
28716 lane
= lane
^ (nelems
/ 2);
28721 /* Some permutations index into pairs of vectors, this is a helper function
28722 to map indexes into those pairs of vectors. */
28725 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28727 int nelem
= GET_MODE_NUNITS (mode
);
28728 if (BYTES_BIG_ENDIAN
)
28730 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28734 /* Generate or test for an insn that supports a constant permutation. */
28736 /* Recognize patterns for the VUZP insns. */
28739 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28741 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28742 rtx out0
, out1
, in0
, in1
;
28743 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28747 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28750 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28751 big endian pattern on 64 bit vectors, so we correct for that. */
28752 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28753 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28755 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28757 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28759 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28763 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28765 for (i
= 0; i
< nelt
; i
++)
28768 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28769 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28779 case E_V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28780 case E_V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28781 case E_V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28782 case E_V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28783 case E_V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28784 case E_V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28785 case E_V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28786 case E_V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28787 case E_V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28788 case E_V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28790 gcc_unreachable ();
28795 if (swap_nelt
!= 0)
28796 std::swap (in0
, in1
);
28799 out1
= gen_reg_rtx (d
->vmode
);
28801 std::swap (out0
, out1
);
28803 emit_insn (gen (out0
, in0
, in1
, out1
));
28807 /* Recognize patterns for the VZIP insns. */
28810 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28812 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28813 rtx out0
, out1
, in0
, in1
;
28814 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28818 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28821 is_swapped
= BYTES_BIG_ENDIAN
;
28823 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28826 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28828 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28832 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28834 for (i
= 0; i
< nelt
/ 2; i
++)
28837 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28838 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28842 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28843 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28854 case E_V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28855 case E_V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28856 case E_V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28857 case E_V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28858 case E_V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28859 case E_V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28860 case E_V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28861 case E_V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28862 case E_V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28863 case E_V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28865 gcc_unreachable ();
28871 std::swap (in0
, in1
);
28874 out1
= gen_reg_rtx (d
->vmode
);
28876 std::swap (out0
, out1
);
28878 emit_insn (gen (out0
, in0
, in1
, out1
));
28882 /* Recognize patterns for the VREV insns. */
28885 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28887 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28888 rtx (*gen
)(rtx
, rtx
);
28890 if (!d
->one_vector_p
)
28899 case E_V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28900 case E_V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28908 case E_V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28909 case E_V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28910 case E_V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28911 case E_V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28912 case E_V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28913 case E_V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28921 case E_V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28922 case E_V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28923 case E_V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28924 case E_V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28925 case E_V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28926 case E_V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28927 case E_V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28928 case E_V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28937 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28938 for (j
= 0; j
<= diff
; j
+= 1)
28940 /* This is guaranteed to be true as the value of diff
28941 is 7, 3, 1 and we should have enough elements in the
28942 queue to generate this. Getting a vector mask with a
28943 value of diff other than these values implies that
28944 something is wrong by the time we get here. */
28945 gcc_assert (i
+ j
< nelt
);
28946 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28954 emit_insn (gen (d
->target
, d
->op0
));
28958 /* Recognize patterns for the VTRN insns. */
28961 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28963 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28964 rtx out0
, out1
, in0
, in1
;
28965 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28967 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28970 /* Note that these are little-endian tests. Adjust for big-endian later. */
28971 if (d
->perm
[0] == 0)
28973 else if (d
->perm
[0] == 1)
28977 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28979 for (i
= 0; i
< nelt
; i
+= 2)
28981 if (d
->perm
[i
] != i
+ odd
)
28983 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28993 case E_V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28994 case E_V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28995 case E_V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28996 case E_V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28997 case E_V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
28998 case E_V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
28999 case E_V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
29000 case E_V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
29001 case E_V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
29002 case E_V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
29004 gcc_unreachable ();
29009 if (BYTES_BIG_ENDIAN
)
29011 std::swap (in0
, in1
);
29016 out1
= gen_reg_rtx (d
->vmode
);
29018 std::swap (out0
, out1
);
29020 emit_insn (gen (out0
, in0
, in1
, out1
));
29024 /* Recognize patterns for the VEXT insns. */
29027 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
29029 unsigned int i
, nelt
= d
->nelt
;
29030 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
29033 unsigned int location
;
29035 unsigned int next
= d
->perm
[0] + 1;
29037 /* TODO: Handle GCC's numbering of elements for big-endian. */
29038 if (BYTES_BIG_ENDIAN
)
29041 /* Check if the extracted indexes are increasing by one. */
29042 for (i
= 1; i
< nelt
; next
++, i
++)
29044 /* If we hit the most significant element of the 2nd vector in
29045 the previous iteration, no need to test further. */
29046 if (next
== 2 * nelt
)
29049 /* If we are operating on only one vector: it could be a
29050 rotation. If there are only two elements of size < 64, let
29051 arm_evpc_neon_vrev catch it. */
29052 if (d
->one_vector_p
&& (next
== nelt
))
29054 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
29060 if (d
->perm
[i
] != next
)
29064 location
= d
->perm
[0];
29068 case E_V16QImode
: gen
= gen_neon_vextv16qi
; break;
29069 case E_V8QImode
: gen
= gen_neon_vextv8qi
; break;
29070 case E_V4HImode
: gen
= gen_neon_vextv4hi
; break;
29071 case E_V8HImode
: gen
= gen_neon_vextv8hi
; break;
29072 case E_V2SImode
: gen
= gen_neon_vextv2si
; break;
29073 case E_V4SImode
: gen
= gen_neon_vextv4si
; break;
29074 case E_V4HFmode
: gen
= gen_neon_vextv4hf
; break;
29075 case E_V8HFmode
: gen
= gen_neon_vextv8hf
; break;
29076 case E_V2SFmode
: gen
= gen_neon_vextv2sf
; break;
29077 case E_V4SFmode
: gen
= gen_neon_vextv4sf
; break;
29078 case E_V2DImode
: gen
= gen_neon_vextv2di
; break;
29087 offset
= GEN_INT (location
);
29088 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
29092 /* The NEON VTBL instruction is a fully variable permuation that's even
29093 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29094 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29095 can do slightly better by expanding this as a constant where we don't
29096 have to apply a mask. */
29099 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29101 rtx rperm
[MAX_VECT_LEN
], sel
;
29102 machine_mode vmode
= d
->vmode
;
29103 unsigned int i
, nelt
= d
->nelt
;
29105 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29106 numbering of elements for big-endian, we must reverse the order. */
29107 if (BYTES_BIG_ENDIAN
)
29113 /* Generic code will try constant permutation twice. Once with the
29114 original mode and again with the elements lowered to QImode.
29115 So wait and don't do the selector expansion ourselves. */
29116 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29119 for (i
= 0; i
< nelt
; ++i
)
29120 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29121 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29122 sel
= force_reg (vmode
, sel
);
29124 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29129 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29131 /* Check if the input mask matches vext before reordering the
29134 if (arm_evpc_neon_vext (d
))
29137 /* The pattern matching functions above are written to look for a small
29138 number to begin the sequence (0, 1, N/2). If we begin with an index
29139 from the second operand, we can swap the operands. */
29140 if (d
->perm
[0] >= d
->nelt
)
29142 unsigned i
, nelt
= d
->nelt
;
29144 for (i
= 0; i
< nelt
; ++i
)
29145 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
29147 std::swap (d
->op0
, d
->op1
);
29152 if (arm_evpc_neon_vuzp (d
))
29154 if (arm_evpc_neon_vzip (d
))
29156 if (arm_evpc_neon_vrev (d
))
29158 if (arm_evpc_neon_vtrn (d
))
29160 return arm_evpc_neon_vtbl (d
);
29165 /* Expand a vec_perm_const pattern. */
29168 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29170 struct expand_vec_perm_d d
;
29171 int i
, nelt
, which
;
29177 d
.vmode
= GET_MODE (target
);
29178 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29179 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29180 d
.testing_p
= false;
29182 for (i
= which
= 0; i
< nelt
; ++i
)
29184 rtx e
= XVECEXP (sel
, 0, i
);
29185 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29186 which
|= (ei
< nelt
? 1 : 2);
29196 d
.one_vector_p
= false;
29197 if (!rtx_equal_p (op0
, op1
))
29200 /* The elements of PERM do not suggest that only the first operand
29201 is used, but both operands are identical. Allow easier matching
29202 of the permutation by folding the permutation into the single
29206 for (i
= 0; i
< nelt
; ++i
)
29207 d
.perm
[i
] &= nelt
- 1;
29209 d
.one_vector_p
= true;
29214 d
.one_vector_p
= true;
29218 return arm_expand_vec_perm_const_1 (&d
);
29221 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29224 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
29225 const unsigned char *sel
)
29227 struct expand_vec_perm_d d
;
29228 unsigned int i
, nelt
, which
;
29232 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29233 d
.testing_p
= true;
29234 memcpy (d
.perm
, sel
, nelt
);
29236 /* Categorize the set of elements in the selector. */
29237 for (i
= which
= 0; i
< nelt
; ++i
)
29239 unsigned char e
= d
.perm
[i
];
29240 gcc_assert (e
< 2 * nelt
);
29241 which
|= (e
< nelt
? 1 : 2);
29244 /* For all elements from second vector, fold the elements to first. */
29246 for (i
= 0; i
< nelt
; ++i
)
29249 /* Check whether the mask can be applied to the vector type. */
29250 d
.one_vector_p
= (which
!= 3);
29252 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29253 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29254 if (!d
.one_vector_p
)
29255 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29258 ret
= arm_expand_vec_perm_const_1 (&d
);
29265 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29267 /* If we are soft float and we do not have ldrd
29268 then all auto increment forms are ok. */
29269 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29274 /* Post increment and Pre Decrement are supported for all
29275 instruction forms except for vector forms. */
29278 if (VECTOR_MODE_P (mode
))
29280 if (code
!= ARM_PRE_DEC
)
29290 /* Without LDRD and mode size greater than
29291 word size, there is no point in auto-incrementing
29292 because ldm and stm will not have these forms. */
29293 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29296 /* Vector and floating point modes do not support
29297 these auto increment forms. */
29298 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29311 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29312 on ARM, since we know that shifts by negative amounts are no-ops.
29313 Additionally, the default expansion code is not available or suitable
29314 for post-reload insn splits (this can occur when the register allocator
29315 chooses not to do a shift in NEON).
29317 This function is used in both initial expand and post-reload splits, and
29318 handles all kinds of 64-bit shifts.
29320 Input requirements:
29321 - It is safe for the input and output to be the same register, but
29322 early-clobber rules apply for the shift amount and scratch registers.
29323 - Shift by register requires both scratch registers. In all other cases
29324 the scratch registers may be NULL.
29325 - Ashiftrt by a register also clobbers the CC register. */
29327 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29328 rtx amount
, rtx scratch1
, rtx scratch2
)
29330 rtx out_high
= gen_highpart (SImode
, out
);
29331 rtx out_low
= gen_lowpart (SImode
, out
);
29332 rtx in_high
= gen_highpart (SImode
, in
);
29333 rtx in_low
= gen_lowpart (SImode
, in
);
29336 in = the register pair containing the input value.
29337 out = the destination register pair.
29338 up = the high- or low-part of each pair.
29339 down = the opposite part to "up".
29340 In a shift, we can consider bits to shift from "up"-stream to
29341 "down"-stream, so in a left-shift "up" is the low-part and "down"
29342 is the high-part of each register pair. */
29344 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29345 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29346 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29347 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29349 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29351 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29352 && GET_MODE (out
) == DImode
);
29354 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29355 && GET_MODE (in
) == DImode
);
29357 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29358 && GET_MODE (amount
) == SImode
)
29359 || CONST_INT_P (amount
)));
29360 gcc_assert (scratch1
== NULL
29361 || (GET_CODE (scratch1
) == SCRATCH
)
29362 || (GET_MODE (scratch1
) == SImode
29363 && REG_P (scratch1
)));
29364 gcc_assert (scratch2
== NULL
29365 || (GET_CODE (scratch2
) == SCRATCH
)
29366 || (GET_MODE (scratch2
) == SImode
29367 && REG_P (scratch2
)));
29368 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29369 || !HARD_REGISTER_P (out
)
29370 || (REGNO (out
) != REGNO (amount
)
29371 && REGNO (out
) + 1 != REGNO (amount
)));
29373 /* Macros to make following code more readable. */
29374 #define SUB_32(DEST,SRC) \
29375 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29376 #define RSB_32(DEST,SRC) \
29377 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29378 #define SUB_S_32(DEST,SRC) \
29379 gen_addsi3_compare0 ((DEST), (SRC), \
29381 #define SET(DEST,SRC) \
29382 gen_rtx_SET ((DEST), (SRC))
29383 #define SHIFT(CODE,SRC,AMOUNT) \
29384 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29385 #define LSHIFT(CODE,SRC,AMOUNT) \
29386 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29387 SImode, (SRC), (AMOUNT))
29388 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29389 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29390 SImode, (SRC), (AMOUNT))
29392 gen_rtx_IOR (SImode, (A), (B))
29393 #define BRANCH(COND,LABEL) \
29394 gen_arm_cond_branch ((LABEL), \
29395 gen_rtx_ ## COND (CCmode, cc_reg, \
29399 /* Shifts by register and shifts by constant are handled separately. */
29400 if (CONST_INT_P (amount
))
29402 /* We have a shift-by-constant. */
29404 /* First, handle out-of-range shift amounts.
29405 In both cases we try to match the result an ARM instruction in a
29406 shift-by-register would give. This helps reduce execution
29407 differences between optimization levels, but it won't stop other
29408 parts of the compiler doing different things. This is "undefined
29409 behavior, in any case. */
29410 if (INTVAL (amount
) <= 0)
29411 emit_insn (gen_movdi (out
, in
));
29412 else if (INTVAL (amount
) >= 64)
29414 if (code
== ASHIFTRT
)
29416 rtx const31_rtx
= GEN_INT (31);
29417 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29418 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29421 emit_insn (gen_movdi (out
, const0_rtx
));
29424 /* Now handle valid shifts. */
29425 else if (INTVAL (amount
) < 32)
29427 /* Shifts by a constant less than 32. */
29428 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29430 /* Clearing the out register in DImode first avoids lots
29431 of spilling and results in less stack usage.
29432 Later this redundant insn is completely removed.
29433 Do that only if "in" and "out" are different registers. */
29434 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29435 emit_insn (SET (out
, const0_rtx
));
29436 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29437 emit_insn (SET (out_down
,
29438 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29440 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29444 /* Shifts by a constant greater than 31. */
29445 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29447 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29448 emit_insn (SET (out
, const0_rtx
));
29449 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29450 if (code
== ASHIFTRT
)
29451 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29454 emit_insn (SET (out_up
, const0_rtx
));
29459 /* We have a shift-by-register. */
29460 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29462 /* This alternative requires the scratch registers. */
29463 gcc_assert (scratch1
&& REG_P (scratch1
));
29464 gcc_assert (scratch2
&& REG_P (scratch2
));
29466 /* We will need the values "amount-32" and "32-amount" later.
29467 Swapping them around now allows the later code to be more general. */
29471 emit_insn (SUB_32 (scratch1
, amount
));
29472 emit_insn (RSB_32 (scratch2
, amount
));
29475 emit_insn (RSB_32 (scratch1
, amount
));
29476 /* Also set CC = amount > 32. */
29477 emit_insn (SUB_S_32 (scratch2
, amount
));
29480 emit_insn (RSB_32 (scratch1
, amount
));
29481 emit_insn (SUB_32 (scratch2
, amount
));
29484 gcc_unreachable ();
29487 /* Emit code like this:
29490 out_down = in_down << amount;
29491 out_down = (in_up << (amount - 32)) | out_down;
29492 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29493 out_up = in_up << amount;
29496 out_down = in_down >> amount;
29497 out_down = (in_up << (32 - amount)) | out_down;
29499 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29500 out_up = in_up << amount;
29503 out_down = in_down >> amount;
29504 out_down = (in_up << (32 - amount)) | out_down;
29506 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29507 out_up = in_up << amount;
29509 The ARM and Thumb2 variants are the same but implemented slightly
29510 differently. If this were only called during expand we could just
29511 use the Thumb2 case and let combine do the right thing, but this
29512 can also be called from post-reload splitters. */
29514 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29516 if (!TARGET_THUMB2
)
29518 /* Emit code for ARM mode. */
29519 emit_insn (SET (out_down
,
29520 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29521 if (code
== ASHIFTRT
)
29523 rtx_code_label
*done_label
= gen_label_rtx ();
29524 emit_jump_insn (BRANCH (LT
, done_label
));
29525 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29527 emit_label (done_label
);
29530 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29535 /* Emit code for Thumb2 mode.
29536 Thumb2 can't do shift and or in one insn. */
29537 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29538 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29540 if (code
== ASHIFTRT
)
29542 rtx_code_label
*done_label
= gen_label_rtx ();
29543 emit_jump_insn (BRANCH (LT
, done_label
));
29544 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29545 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29546 emit_label (done_label
);
29550 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29551 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29555 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29569 /* Returns true if the pattern is a valid symbolic address, which is either a
29570 symbol_ref or (symbol_ref + addend).
29572 According to the ARM ELF ABI, the initial addend of REL-type relocations
29573 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29574 literal field of the instruction as a 16-bit signed value in the range
29575 -32768 <= A < 32768. */
29578 arm_valid_symbolic_address_p (rtx addr
)
29580 rtx xop0
, xop1
= NULL_RTX
;
29583 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29586 /* (const (plus: symbol_ref const_int)) */
29587 if (GET_CODE (addr
) == CONST
)
29588 tmp
= XEXP (addr
, 0);
29590 if (GET_CODE (tmp
) == PLUS
)
29592 xop0
= XEXP (tmp
, 0);
29593 xop1
= XEXP (tmp
, 1);
29595 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29596 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29602 /* Returns true if a valid comparison operation and makes
29603 the operands in a form that is valid. */
29605 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29607 enum rtx_code code
= GET_CODE (*comparison
);
29609 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29610 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29612 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29614 if (code
== UNEQ
|| code
== LTGT
)
29617 code_int
= (int)code
;
29618 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29619 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29624 if (!arm_add_operand (*op1
, mode
))
29625 *op1
= force_reg (mode
, *op1
);
29626 if (!arm_add_operand (*op2
, mode
))
29627 *op2
= force_reg (mode
, *op2
);
29631 if (!cmpdi_operand (*op1
, mode
))
29632 *op1
= force_reg (mode
, *op1
);
29633 if (!cmpdi_operand (*op2
, mode
))
29634 *op2
= force_reg (mode
, *op2
);
29638 if (!TARGET_VFP_FP16INST
)
29640 /* FP16 comparisons are done in SF mode. */
29642 *op1
= convert_to_mode (mode
, *op1
, 1);
29643 *op2
= convert_to_mode (mode
, *op2
, 1);
29644 /* Fall through. */
29647 if (!vfp_compare_operand (*op1
, mode
))
29648 *op1
= force_reg (mode
, *op1
);
29649 if (!vfp_compare_operand (*op2
, mode
))
29650 *op2
= force_reg (mode
, *op2
);
29660 /* Maximum number of instructions to set block of memory. */
29662 arm_block_set_max_insns (void)
29664 if (optimize_function_for_size_p (cfun
))
29667 return current_tune
->max_insns_inline_memset
;
29670 /* Return TRUE if it's profitable to set block of memory for
29671 non-vectorized case. VAL is the value to set the memory
29672 with. LENGTH is the number of bytes to set. ALIGN is the
29673 alignment of the destination memory in bytes. UNALIGNED_P
29674 is TRUE if we can only set the memory with instructions
29675 meeting alignment requirements. USE_STRD_P is TRUE if we
29676 can use strd to set the memory. */
29678 arm_block_set_non_vect_profit_p (rtx val
,
29679 unsigned HOST_WIDE_INT length
,
29680 unsigned HOST_WIDE_INT align
,
29681 bool unaligned_p
, bool use_strd_p
)
29684 /* For leftovers in bytes of 0-7, we can set the memory block using
29685 strb/strh/str with minimum instruction number. */
29686 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29690 num
= arm_const_inline_cost (SET
, val
);
29691 num
+= length
/ align
+ length
% align
;
29693 else if (use_strd_p
)
29695 num
= arm_const_double_inline_cost (val
);
29696 num
+= (length
>> 3) + leftover
[length
& 7];
29700 num
= arm_const_inline_cost (SET
, val
);
29701 num
+= (length
>> 2) + leftover
[length
& 3];
29704 /* We may be able to combine last pair STRH/STRB into a single STR
29705 by shifting one byte back. */
29706 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29709 return (num
<= arm_block_set_max_insns ());
29712 /* Return TRUE if it's profitable to set block of memory for
29713 vectorized case. LENGTH is the number of bytes to set.
29714 ALIGN is the alignment of destination memory in bytes.
29715 MODE is the vector mode used to set the memory. */
29717 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29718 unsigned HOST_WIDE_INT align
,
29722 bool unaligned_p
= ((align
& 3) != 0);
29723 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29725 /* Instruction loading constant value. */
29727 /* Instructions storing the memory. */
29728 num
+= (length
+ nelt
- 1) / nelt
;
29729 /* Instructions adjusting the address expression. Only need to
29730 adjust address expression if it's 4 bytes aligned and bytes
29731 leftover can only be stored by mis-aligned store instruction. */
29732 if (!unaligned_p
&& (length
& 3) != 0)
29735 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29736 if (!unaligned_p
&& mode
== V16QImode
)
29739 return (num
<= arm_block_set_max_insns ());
29742 /* Set a block of memory using vectorization instructions for the
29743 unaligned case. We fill the first LENGTH bytes of the memory
29744 area starting from DSTBASE with byte constant VALUE. ALIGN is
29745 the alignment requirement of memory. Return TRUE if succeeded. */
29747 arm_block_set_unaligned_vect (rtx dstbase
,
29748 unsigned HOST_WIDE_INT length
,
29749 unsigned HOST_WIDE_INT value
,
29750 unsigned HOST_WIDE_INT align
)
29752 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29754 rtx val_elt
, val_vec
, reg
;
29755 rtx rval
[MAX_VECT_LEN
];
29756 rtx (*gen_func
) (rtx
, rtx
);
29758 unsigned HOST_WIDE_INT v
= value
;
29759 unsigned int offset
= 0;
29760 gcc_assert ((align
& 0x3) != 0);
29761 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29762 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29763 if (length
>= nelt_v16
)
29766 gen_func
= gen_movmisalignv16qi
;
29771 gen_func
= gen_movmisalignv8qi
;
29773 nelt_mode
= GET_MODE_NUNITS (mode
);
29774 gcc_assert (length
>= nelt_mode
);
29775 /* Skip if it isn't profitable. */
29776 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29779 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29780 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29782 v
= sext_hwi (v
, BITS_PER_WORD
);
29783 val_elt
= GEN_INT (v
);
29784 for (j
= 0; j
< nelt_mode
; j
++)
29787 reg
= gen_reg_rtx (mode
);
29788 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29789 /* Emit instruction loading the constant value. */
29790 emit_move_insn (reg
, val_vec
);
29792 /* Handle nelt_mode bytes in a vector. */
29793 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29795 emit_insn ((*gen_func
) (mem
, reg
));
29796 if (i
+ 2 * nelt_mode
<= length
)
29798 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29799 offset
+= nelt_mode
;
29800 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29804 /* If there are not less than nelt_v8 bytes leftover, we must be in
29806 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29808 /* Handle (8, 16) bytes leftover. */
29809 if (i
+ nelt_v8
< length
)
29811 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29812 offset
+= length
- i
;
29813 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29815 /* We are shifting bytes back, set the alignment accordingly. */
29816 if ((length
& 1) != 0 && align
>= 2)
29817 set_mem_align (mem
, BITS_PER_UNIT
);
29819 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29821 /* Handle (0, 8] bytes leftover. */
29822 else if (i
< length
&& i
+ nelt_v8
>= length
)
29824 if (mode
== V16QImode
)
29825 reg
= gen_lowpart (V8QImode
, reg
);
29827 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29828 + (nelt_mode
- nelt_v8
))));
29829 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29830 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29832 /* We are shifting bytes back, set the alignment accordingly. */
29833 if ((length
& 1) != 0 && align
>= 2)
29834 set_mem_align (mem
, BITS_PER_UNIT
);
29836 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29842 /* Set a block of memory using vectorization instructions for the
29843 aligned case. We fill the first LENGTH bytes of the memory area
29844 starting from DSTBASE with byte constant VALUE. ALIGN is the
29845 alignment requirement of memory. Return TRUE if succeeded. */
29847 arm_block_set_aligned_vect (rtx dstbase
,
29848 unsigned HOST_WIDE_INT length
,
29849 unsigned HOST_WIDE_INT value
,
29850 unsigned HOST_WIDE_INT align
)
29852 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29853 rtx dst
, addr
, mem
;
29854 rtx val_elt
, val_vec
, reg
;
29855 rtx rval
[MAX_VECT_LEN
];
29857 unsigned HOST_WIDE_INT v
= value
;
29858 unsigned int offset
= 0;
29860 gcc_assert ((align
& 0x3) == 0);
29861 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29862 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29863 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29868 nelt_mode
= GET_MODE_NUNITS (mode
);
29869 gcc_assert (length
>= nelt_mode
);
29870 /* Skip if it isn't profitable. */
29871 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29874 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29876 v
= sext_hwi (v
, BITS_PER_WORD
);
29877 val_elt
= GEN_INT (v
);
29878 for (j
= 0; j
< nelt_mode
; j
++)
29881 reg
= gen_reg_rtx (mode
);
29882 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29883 /* Emit instruction loading the constant value. */
29884 emit_move_insn (reg
, val_vec
);
29887 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29888 if (mode
== V16QImode
)
29890 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29891 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29893 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29894 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29896 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29897 offset
+= length
- nelt_mode
;
29898 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29899 /* We are shifting bytes back, set the alignment accordingly. */
29900 if ((length
& 0x3) == 0)
29901 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29902 else if ((length
& 0x1) == 0)
29903 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29905 set_mem_align (mem
, BITS_PER_UNIT
);
29907 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29910 /* Fall through for bytes leftover. */
29912 nelt_mode
= GET_MODE_NUNITS (mode
);
29913 reg
= gen_lowpart (V8QImode
, reg
);
29916 /* Handle 8 bytes in a vector. */
29917 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29919 addr
= plus_constant (Pmode
, dst
, i
);
29920 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29921 emit_move_insn (mem
, reg
);
29924 /* Handle single word leftover by shifting 4 bytes back. We can
29925 use aligned access for this case. */
29926 if (i
+ UNITS_PER_WORD
== length
)
29928 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29929 offset
+= i
- UNITS_PER_WORD
;
29930 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29931 /* We are shifting 4 bytes back, set the alignment accordingly. */
29932 if (align
> UNITS_PER_WORD
)
29933 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29935 emit_move_insn (mem
, reg
);
29937 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29938 We have to use unaligned access for this case. */
29939 else if (i
< length
)
29941 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29942 offset
+= length
- nelt_mode
;
29943 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29944 /* We are shifting bytes back, set the alignment accordingly. */
29945 if ((length
& 1) == 0)
29946 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29948 set_mem_align (mem
, BITS_PER_UNIT
);
29950 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29956 /* Set a block of memory using plain strh/strb instructions, only
29957 using instructions allowed by ALIGN on processor. We fill the
29958 first LENGTH bytes of the memory area starting from DSTBASE
29959 with byte constant VALUE. ALIGN is the alignment requirement
29962 arm_block_set_unaligned_non_vect (rtx dstbase
,
29963 unsigned HOST_WIDE_INT length
,
29964 unsigned HOST_WIDE_INT value
,
29965 unsigned HOST_WIDE_INT align
)
29968 rtx dst
, addr
, mem
;
29969 rtx val_exp
, val_reg
, reg
;
29971 HOST_WIDE_INT v
= value
;
29973 gcc_assert (align
== 1 || align
== 2);
29976 v
|= (value
<< BITS_PER_UNIT
);
29978 v
= sext_hwi (v
, BITS_PER_WORD
);
29979 val_exp
= GEN_INT (v
);
29980 /* Skip if it isn't profitable. */
29981 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29982 align
, true, false))
29985 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29986 mode
= (align
== 2 ? HImode
: QImode
);
29987 val_reg
= force_reg (SImode
, val_exp
);
29988 reg
= gen_lowpart (mode
, val_reg
);
29990 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29992 addr
= plus_constant (Pmode
, dst
, i
);
29993 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29994 emit_move_insn (mem
, reg
);
29997 /* Handle single byte leftover. */
29998 if (i
+ 1 == length
)
30000 reg
= gen_lowpart (QImode
, val_reg
);
30001 addr
= plus_constant (Pmode
, dst
, i
);
30002 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30003 emit_move_insn (mem
, reg
);
30007 gcc_assert (i
== length
);
30011 /* Set a block of memory using plain strd/str/strh/strb instructions,
30012 to permit unaligned copies on processors which support unaligned
30013 semantics for those instructions. We fill the first LENGTH bytes
30014 of the memory area starting from DSTBASE with byte constant VALUE.
30015 ALIGN is the alignment requirement of memory. */
30017 arm_block_set_aligned_non_vect (rtx dstbase
,
30018 unsigned HOST_WIDE_INT length
,
30019 unsigned HOST_WIDE_INT value
,
30020 unsigned HOST_WIDE_INT align
)
30023 rtx dst
, addr
, mem
;
30024 rtx val_exp
, val_reg
, reg
;
30025 unsigned HOST_WIDE_INT v
;
30028 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
30029 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
30031 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
30032 if (length
< UNITS_PER_WORD
)
30033 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
30036 v
|= (v
<< BITS_PER_WORD
);
30038 v
= sext_hwi (v
, BITS_PER_WORD
);
30040 val_exp
= GEN_INT (v
);
30041 /* Skip if it isn't profitable. */
30042 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30043 align
, false, use_strd_p
))
30048 /* Try without strd. */
30049 v
= (v
>> BITS_PER_WORD
);
30050 v
= sext_hwi (v
, BITS_PER_WORD
);
30051 val_exp
= GEN_INT (v
);
30052 use_strd_p
= false;
30053 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30054 align
, false, use_strd_p
))
30059 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30060 /* Handle double words using strd if possible. */
30063 val_reg
= force_reg (DImode
, val_exp
);
30065 for (; (i
+ 8 <= length
); i
+= 8)
30067 addr
= plus_constant (Pmode
, dst
, i
);
30068 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
30069 emit_move_insn (mem
, reg
);
30073 val_reg
= force_reg (SImode
, val_exp
);
30075 /* Handle words. */
30076 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30077 for (; (i
+ 4 <= length
); i
+= 4)
30079 addr
= plus_constant (Pmode
, dst
, i
);
30080 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30081 if ((align
& 3) == 0)
30082 emit_move_insn (mem
, reg
);
30084 emit_insn (gen_unaligned_storesi (mem
, reg
));
30087 /* Merge last pair of STRH and STRB into a STR if possible. */
30088 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30090 addr
= plus_constant (Pmode
, dst
, i
- 1);
30091 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30092 /* We are shifting one byte back, set the alignment accordingly. */
30093 if ((align
& 1) == 0)
30094 set_mem_align (mem
, BITS_PER_UNIT
);
30096 /* Most likely this is an unaligned access, and we can't tell at
30097 compilation time. */
30098 emit_insn (gen_unaligned_storesi (mem
, reg
));
30102 /* Handle half word leftover. */
30103 if (i
+ 2 <= length
)
30105 reg
= gen_lowpart (HImode
, val_reg
);
30106 addr
= plus_constant (Pmode
, dst
, i
);
30107 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30108 if ((align
& 1) == 0)
30109 emit_move_insn (mem
, reg
);
30111 emit_insn (gen_unaligned_storehi (mem
, reg
));
30116 /* Handle single byte leftover. */
30117 if (i
+ 1 == length
)
30119 reg
= gen_lowpart (QImode
, val_reg
);
30120 addr
= plus_constant (Pmode
, dst
, i
);
30121 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30122 emit_move_insn (mem
, reg
);
30128 /* Set a block of memory using vectorization instructions for both
30129 aligned and unaligned cases. We fill the first LENGTH bytes of
30130 the memory area starting from DSTBASE with byte constant VALUE.
30131 ALIGN is the alignment requirement of memory. */
30133 arm_block_set_vect (rtx dstbase
,
30134 unsigned HOST_WIDE_INT length
,
30135 unsigned HOST_WIDE_INT value
,
30136 unsigned HOST_WIDE_INT align
)
30138 /* Check whether we need to use unaligned store instruction. */
30139 if (((align
& 3) != 0 || (length
& 3) != 0)
30140 /* Check whether unaligned store instruction is available. */
30141 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30144 if ((align
& 3) == 0)
30145 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30147 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30150 /* Expand string store operation. Firstly we try to do that by using
30151 vectorization instructions, then try with ARM unaligned access and
30152 double-word store if profitable. OPERANDS[0] is the destination,
30153 OPERANDS[1] is the number of bytes, operands[2] is the value to
30154 initialize the memory, OPERANDS[3] is the known alignment of the
30157 arm_gen_setmem (rtx
*operands
)
30159 rtx dstbase
= operands
[0];
30160 unsigned HOST_WIDE_INT length
;
30161 unsigned HOST_WIDE_INT value
;
30162 unsigned HOST_WIDE_INT align
;
30164 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30167 length
= UINTVAL (operands
[1]);
30171 value
= (UINTVAL (operands
[2]) & 0xFF);
30172 align
= UINTVAL (operands
[3]);
30173 if (TARGET_NEON
&& length
>= 8
30174 && current_tune
->string_ops_prefer_neon
30175 && arm_block_set_vect (dstbase
, length
, value
, align
))
30178 if (!unaligned_access
&& (align
& 3) != 0)
30179 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30181 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30186 arm_macro_fusion_p (void)
30188 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30191 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30192 for MOVW / MOVT macro fusion. */
30195 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30197 /* We are trying to fuse
30198 movw imm / movt imm
30199 instructions as a group that gets scheduled together. */
30201 rtx set_dest
= SET_DEST (curr_set
);
30203 if (GET_MODE (set_dest
) != SImode
)
30206 /* We are trying to match:
30207 prev (movw) == (set (reg r0) (const_int imm16))
30208 curr (movt) == (set (zero_extract (reg r0)
30211 (const_int imm16_1))
30213 prev (movw) == (set (reg r1)
30214 (high (symbol_ref ("SYM"))))
30215 curr (movt) == (set (reg r0)
30217 (symbol_ref ("SYM")))) */
30219 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30221 if (CONST_INT_P (SET_SRC (curr_set
))
30222 && CONST_INT_P (SET_SRC (prev_set
))
30223 && REG_P (XEXP (set_dest
, 0))
30224 && REG_P (SET_DEST (prev_set
))
30225 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30229 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30230 && REG_P (SET_DEST (curr_set
))
30231 && REG_P (SET_DEST (prev_set
))
30232 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30233 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30240 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30242 rtx prev_set
= single_set (prev
);
30243 rtx curr_set
= single_set (curr
);
30249 if (any_condjump_p (curr
))
30252 if (!arm_macro_fusion_p ())
30255 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30256 && aarch_crypto_can_dual_issue (prev
, curr
))
30259 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30260 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30266 /* Return true iff the instruction fusion described by OP is enabled. */
30268 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30270 return current_tune
->fusible_ops
& op
;
30273 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30274 scheduled for speculative execution. Reject the long-running division
30275 and square-root instructions. */
30278 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30280 switch (get_attr_type (insn
))
30288 case TYPE_NEON_FP_SQRT_S
:
30289 case TYPE_NEON_FP_SQRT_D
:
30290 case TYPE_NEON_FP_SQRT_S_Q
:
30291 case TYPE_NEON_FP_SQRT_D_Q
:
30292 case TYPE_NEON_FP_DIV_S
:
30293 case TYPE_NEON_FP_DIV_D
:
30294 case TYPE_NEON_FP_DIV_S_Q
:
30295 case TYPE_NEON_FP_DIV_D_Q
:
30302 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30304 static unsigned HOST_WIDE_INT
30305 arm_asan_shadow_offset (void)
30307 return HOST_WIDE_INT_1U
<< 29;
30311 /* This is a temporary fix for PR60655. Ideally we need
30312 to handle most of these cases in the generic part but
30313 currently we reject minus (..) (sym_ref). We try to
30314 ameliorate the case with minus (sym_ref1) (sym_ref2)
30315 where they are in the same section. */
30318 arm_const_not_ok_for_debug_p (rtx p
)
30320 tree decl_op0
= NULL
;
30321 tree decl_op1
= NULL
;
30323 if (GET_CODE (p
) == MINUS
)
30325 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30327 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30329 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30330 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30332 if ((VAR_P (decl_op1
)
30333 || TREE_CODE (decl_op1
) == CONST_DECL
)
30334 && (VAR_P (decl_op0
)
30335 || TREE_CODE (decl_op0
) == CONST_DECL
))
30336 return (get_variable_section (decl_op1
, false)
30337 != get_variable_section (decl_op0
, false));
30339 if (TREE_CODE (decl_op1
) == LABEL_DECL
30340 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30341 return (DECL_CONTEXT (decl_op1
)
30342 != DECL_CONTEXT (decl_op0
));
30352 /* return TRUE if x is a reference to a value in a constant pool */
30354 arm_is_constant_pool_ref (rtx x
)
30357 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30358 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30361 /* Remember the last target of arm_set_current_function. */
30362 static GTY(()) tree arm_previous_fndecl
;
30364 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30367 save_restore_target_globals (tree new_tree
)
30369 /* If we have a previous state, use it. */
30370 if (TREE_TARGET_GLOBALS (new_tree
))
30371 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30372 else if (new_tree
== target_option_default_node
)
30373 restore_target_globals (&default_target_globals
);
30376 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30377 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30380 arm_option_params_internal ();
30383 /* Invalidate arm_previous_fndecl. */
30386 arm_reset_previous_fndecl (void)
30388 arm_previous_fndecl
= NULL_TREE
;
30391 /* Establish appropriate back-end context for processing the function
30392 FNDECL. The argument might be NULL to indicate processing at top
30393 level, outside of any function scope. */
30396 arm_set_current_function (tree fndecl
)
30398 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30401 tree old_tree
= (arm_previous_fndecl
30402 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30405 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30407 /* If current function has no attributes but previous one did,
30408 use the default node. */
30409 if (! new_tree
&& old_tree
)
30410 new_tree
= target_option_default_node
;
30412 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30413 the default have been handled by save_restore_target_globals from
30414 arm_pragma_target_parse. */
30415 if (old_tree
== new_tree
)
30418 arm_previous_fndecl
= fndecl
;
30420 /* First set the target options. */
30421 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30423 save_restore_target_globals (new_tree
);
30426 /* Implement TARGET_OPTION_PRINT. */
30429 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30431 int flags
= ptr
->x_target_flags
;
30432 const char *fpu_name
;
30434 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30435 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30437 fprintf (file
, "%*sselected isa %s\n", indent
, "",
30438 TARGET_THUMB2_P (flags
) ? "thumb2" :
30439 TARGET_THUMB_P (flags
) ? "thumb1" :
30442 if (ptr
->x_arm_arch_string
)
30443 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
30444 ptr
->x_arm_arch_string
);
30446 if (ptr
->x_arm_cpu_string
)
30447 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
30448 ptr
->x_arm_cpu_string
);
30450 if (ptr
->x_arm_tune_string
)
30451 fprintf (file
, "%*sselected tune %s\n", indent
, "",
30452 ptr
->x_arm_tune_string
);
30454 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30457 /* Hook to determine if one function can safely inline another. */
30460 arm_can_inline_p (tree caller
, tree callee
)
30462 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30463 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30464 bool can_inline
= true;
30466 struct cl_target_option
*caller_opts
30467 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30468 : target_option_default_node
);
30470 struct cl_target_option
*callee_opts
30471 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30472 : target_option_default_node
);
30474 if (callee_opts
== caller_opts
)
30477 /* Callee's ISA features should be a subset of the caller's. */
30478 struct arm_build_target caller_target
;
30479 struct arm_build_target callee_target
;
30480 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30481 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30483 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30485 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30487 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30488 can_inline
= false;
30490 sbitmap_free (caller_target
.isa
);
30491 sbitmap_free (callee_target
.isa
);
30493 /* OK to inline between different modes.
30494 Function with mode specific instructions, e.g using asm,
30495 must be explicitly protected with noinline. */
30499 /* Hook to fix function's alignment affected by target attribute. */
30502 arm_relayout_function (tree fndecl
)
30504 if (DECL_USER_ALIGN (fndecl
))
30507 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30510 callee_tree
= target_option_default_node
;
30512 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30515 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30518 /* Inner function to process the attribute((target(...))), take an argument and
30519 set the current options from the argument. If we have a list, recursively
30520 go over the list. */
30523 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30525 if (TREE_CODE (args
) == TREE_LIST
)
30529 for (; args
; args
= TREE_CHAIN (args
))
30530 if (TREE_VALUE (args
)
30531 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30536 else if (TREE_CODE (args
) != STRING_CST
)
30538 error ("attribute %<target%> argument not a string");
30542 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30545 while ((q
= strtok (argstr
, ",")) != NULL
)
30547 while (ISSPACE (*q
)) ++q
;
30550 if (!strncmp (q
, "thumb", 5))
30551 opts
->x_target_flags
|= MASK_THUMB
;
30553 else if (!strncmp (q
, "arm", 3))
30554 opts
->x_target_flags
&= ~MASK_THUMB
;
30556 else if (!strncmp (q
, "fpu=", 4))
30559 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30560 &fpu_index
, CL_TARGET
))
30562 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30565 if (fpu_index
== TARGET_FPU_auto
)
30567 /* This doesn't really make sense until we support
30568 general dynamic selection of the architecture and all
30570 sorry ("auto fpu selection not currently permitted here");
30573 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30577 error ("attribute(target(\"%s\")) is unknown", q
);
30585 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30588 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30589 struct gcc_options
*opts_set
)
30591 struct cl_target_option cl_opts
;
30593 if (!arm_valid_target_attribute_rec (args
, opts
))
30596 cl_target_option_save (&cl_opts
, opts
);
30597 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30598 arm_option_check_internal (opts
);
30599 /* Do any overrides, such as global options arch=xxx. */
30600 arm_option_override_internal (opts
, opts_set
);
30602 return build_target_option_node (opts
);
30606 add_attribute (const char * mode
, tree
*attributes
)
30608 size_t len
= strlen (mode
);
30609 tree value
= build_string (len
, mode
);
30611 TREE_TYPE (value
) = build_array_type (char_type_node
,
30612 build_index_type (size_int (len
)));
30614 *attributes
= tree_cons (get_identifier ("target"),
30615 build_tree_list (NULL_TREE
, value
),
30619 /* For testing. Insert thumb or arm modes alternatively on functions. */
30622 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30626 if (! TARGET_FLIP_THUMB
)
30629 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30630 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30633 /* Nested definitions must inherit mode. */
30634 if (current_function_decl
)
30636 mode
= TARGET_THUMB
? "thumb" : "arm";
30637 add_attribute (mode
, attributes
);
30641 /* If there is already a setting don't change it. */
30642 if (lookup_attribute ("target", *attributes
) != NULL
)
30645 mode
= thumb_flipper
? "thumb" : "arm";
30646 add_attribute (mode
, attributes
);
30648 thumb_flipper
= !thumb_flipper
;
30651 /* Hook to validate attribute((target("string"))). */
30654 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30655 tree args
, int ARG_UNUSED (flags
))
30658 struct gcc_options func_options
;
30659 tree cur_tree
, new_optimize
;
30660 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30662 /* Get the optimization options of the current function. */
30663 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30665 /* If the function changed the optimization levels as well as setting target
30666 options, start with the optimizations specified. */
30667 if (!func_optimize
)
30668 func_optimize
= optimization_default_node
;
30670 /* Init func_options. */
30671 memset (&func_options
, 0, sizeof (func_options
));
30672 init_options_struct (&func_options
, NULL
);
30673 lang_hooks
.init_options_struct (&func_options
);
30675 /* Initialize func_options to the defaults. */
30676 cl_optimization_restore (&func_options
,
30677 TREE_OPTIMIZATION (func_optimize
));
30679 cl_target_option_restore (&func_options
,
30680 TREE_TARGET_OPTION (target_option_default_node
));
30682 /* Set func_options flags with new target mode. */
30683 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30684 &global_options_set
);
30686 if (cur_tree
== NULL_TREE
)
30689 new_optimize
= build_optimization_node (&func_options
);
30691 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30693 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30695 finalize_options_struct (&func_options
);
30700 /* Match an ISA feature bitmap to a named FPU. We always use the
30701 first entry that exactly matches the feature set, so that we
30702 effectively canonicalize the FPU name for the assembler. */
30704 arm_identify_fpu_from_isa (sbitmap isa
)
30706 auto_sbitmap
fpubits (isa_num_bits
);
30707 auto_sbitmap
cand_fpubits (isa_num_bits
);
30709 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30711 /* If there are no ISA feature bits relating to the FPU, we must be
30712 doing soft-float. */
30713 if (bitmap_empty_p (fpubits
))
30716 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
30718 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30719 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30720 return all_fpus
[i
].name
;
30722 /* We must find an entry, or things have gone wrong. */
30723 gcc_unreachable ();
30727 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30730 fprintf (stream
, "\t.syntax unified\n");
30734 if (is_called_in_ARM_mode (decl
)
30735 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30736 && cfun
->is_thunk
))
30737 fprintf (stream
, "\t.code 32\n");
30738 else if (TARGET_THUMB1
)
30739 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30741 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30744 fprintf (stream
, "\t.arm\n");
30746 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30749 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30751 if (TARGET_POKE_FUNCTION_NAME
)
30752 arm_poke_function_name (stream
, (const char *) name
);
30755 /* If MEM is in the form of [base+offset], extract the two parts
30756 of address and set to BASE and OFFSET, otherwise return false
30757 after clearing BASE and OFFSET. */
30760 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30764 gcc_assert (MEM_P (mem
));
30766 addr
= XEXP (mem
, 0);
30768 /* Strip off const from addresses like (const (addr)). */
30769 if (GET_CODE (addr
) == CONST
)
30770 addr
= XEXP (addr
, 0);
30772 if (GET_CODE (addr
) == REG
)
30775 *offset
= const0_rtx
;
30779 if (GET_CODE (addr
) == PLUS
30780 && GET_CODE (XEXP (addr
, 0)) == REG
30781 && CONST_INT_P (XEXP (addr
, 1)))
30783 *base
= XEXP (addr
, 0);
30784 *offset
= XEXP (addr
, 1);
30789 *offset
= NULL_RTX
;
30794 /* If INSN is a load or store of address in the form of [base+offset],
30795 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30796 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30797 otherwise return FALSE. */
30800 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30804 gcc_assert (INSN_P (insn
));
30805 x
= PATTERN (insn
);
30806 if (GET_CODE (x
) != SET
)
30810 dest
= SET_DEST (x
);
30811 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30814 extract_base_offset_in_addr (dest
, base
, offset
);
30816 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30819 extract_base_offset_in_addr (src
, base
, offset
);
30824 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30827 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30829 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30830 and PRI are only calculated for these instructions. For other instruction,
30831 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30832 instruction fusion can be supported by returning different priorities.
30834 It's important that irrelevant instructions get the largest FUSION_PRI. */
30837 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30838 int *fusion_pri
, int *pri
)
30844 gcc_assert (INSN_P (insn
));
30847 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30854 /* Load goes first. */
30856 *fusion_pri
= tmp
- 1;
30858 *fusion_pri
= tmp
- 2;
30862 /* INSN with smaller base register goes first. */
30863 tmp
-= ((REGNO (base
) & 0xff) << 20);
30865 /* INSN with smaller offset goes first. */
30866 off_val
= (int)(INTVAL (offset
));
30868 tmp
-= (off_val
& 0xfffff);
30870 tmp
+= ((- off_val
) & 0xfffff);
30877 /* Construct and return a PARALLEL RTX vector with elements numbering the
30878 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30879 the vector - from the perspective of the architecture. This does not
30880 line up with GCC's perspective on lane numbers, so we end up with
30881 different masks depending on our target endian-ness. The diagram
30882 below may help. We must draw the distinction when building masks
30883 which select one half of the vector. An instruction selecting
30884 architectural low-lanes for a big-endian target, must be described using
30885 a mask selecting GCC high-lanes.
30887 Big-Endian Little-Endian
30889 GCC 0 1 2 3 3 2 1 0
30890 | x | x | x | x | | x | x | x | x |
30891 Architecture 3 2 1 0 3 2 1 0
30893 Low Mask: { 2, 3 } { 0, 1 }
30894 High Mask: { 0, 1 } { 2, 3 }
30898 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30900 int nunits
= GET_MODE_NUNITS (mode
);
30901 rtvec v
= rtvec_alloc (nunits
/ 2);
30902 int high_base
= nunits
/ 2;
30908 if (BYTES_BIG_ENDIAN
)
30909 base
= high
? low_base
: high_base
;
30911 base
= high
? high_base
: low_base
;
30913 for (i
= 0; i
< nunits
/ 2; i
++)
30914 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30916 t1
= gen_rtx_PARALLEL (mode
, v
);
30920 /* Check OP for validity as a PARALLEL RTX vector with elements
30921 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30922 from the perspective of the architecture. See the diagram above
30923 arm_simd_vect_par_cnst_half_p for more details. */
30926 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30929 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30930 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30931 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30934 if (!VECTOR_MODE_P (mode
))
30937 if (count_op
!= count_ideal
)
30940 for (i
= 0; i
< count_ideal
; i
++)
30942 rtx elt_op
= XVECEXP (op
, 0, i
);
30943 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30945 if (!CONST_INT_P (elt_op
)
30946 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30952 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30955 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30958 /* For now, we punt and not handle this for TARGET_THUMB1. */
30959 if (vcall_offset
&& TARGET_THUMB1
)
30962 /* Otherwise ok. */
30966 /* Generate RTL for a conditional branch with rtx comparison CODE in
30967 mode CC_MODE. The destination of the unlikely conditional branch
30971 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
30975 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
30976 gen_rtx_REG (cc_mode
, CC_REGNUM
),
30979 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30980 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
30982 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30985 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30987 For pure-code sections there is no letter code for this attribute, so
30988 output all the section flags numerically when this is needed. */
30991 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
30994 if (flags
& SECTION_ARM_PURECODE
)
30998 if (!(flags
& SECTION_DEBUG
))
31000 if (flags
& SECTION_EXCLUDE
)
31001 *num
|= 0x80000000;
31002 if (flags
& SECTION_WRITE
)
31004 if (flags
& SECTION_CODE
)
31006 if (flags
& SECTION_MERGE
)
31008 if (flags
& SECTION_STRINGS
)
31010 if (flags
& SECTION_TLS
)
31012 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
31021 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31023 If pure-code is passed as an option, make sure all functions are in
31024 sections that have the SHF_ARM_PURECODE attribute. */
31027 arm_function_section (tree decl
, enum node_frequency freq
,
31028 bool startup
, bool exit
)
31030 const char * section_name
;
31033 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
31034 return default_function_section (decl
, freq
, startup
, exit
);
31036 if (!target_pure_code
)
31037 return default_function_section (decl
, freq
, startup
, exit
);
31040 section_name
= DECL_SECTION_NAME (decl
);
31042 /* If a function is not in a named section then it falls under the 'default'
31043 text section, also known as '.text'. We can preserve previous behavior as
31044 the default text section already has the SHF_ARM_PURECODE section
31048 section
*default_sec
= default_function_section (decl
, freq
, startup
,
31051 /* If default_sec is not null, then it must be a special section like for
31052 example .text.startup. We set the pure-code attribute and return the
31053 same section to preserve existing behavior. */
31055 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31056 return default_sec
;
31059 /* Otherwise look whether a section has already been created with
31061 sec
= get_named_section (decl
, section_name
, 0);
31063 /* If that is not the case passing NULL as the section's name to
31064 'get_named_section' will create a section with the declaration's
31066 sec
= get_named_section (decl
, NULL
, 0);
31068 /* Set the SHF_ARM_PURECODE attribute. */
31069 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31074 /* Implements the TARGET_SECTION_FLAGS hook.
31076 If DECL is a function declaration and pure-code is passed as an option
31077 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31078 section's name and RELOC indicates whether the declarations initializer may
31079 contain runtime relocations. */
31081 static unsigned int
31082 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31084 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31086 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31087 flags
|= SECTION_ARM_PURECODE
;
31092 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31095 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31097 rtx
*quot_p
, rtx
*rem_p
)
31099 if (mode
== SImode
)
31100 gcc_assert (!TARGET_IDIV
);
31102 scalar_int_mode libval_mode
31103 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
31105 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31107 op0
, GET_MODE (op0
),
31108 op1
, GET_MODE (op1
));
31110 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31111 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31112 GET_MODE_SIZE (mode
));
31114 gcc_assert (quotient
);
31115 gcc_assert (remainder
);
31117 *quot_p
= quotient
;
31118 *rem_p
= remainder
;
31121 /* This function checks for the availability of the coprocessor builtin passed
31122 in BUILTIN for the current target. Returns true if it is available and
31123 false otherwise. If a BUILTIN is passed for which this function has not
31124 been implemented it will cause an exception. */
31127 arm_coproc_builtin_available (enum unspecv builtin
)
31129 /* None of these builtins are available in Thumb mode if the target only
31130 supports Thumb-1. */
31148 case VUNSPEC_LDC2L
:
31150 case VUNSPEC_STC2L
:
31153 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31160 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31162 if (arm_arch6
|| arm_arch5te
)
31165 case VUNSPEC_MCRR2
:
31166 case VUNSPEC_MRRC2
:
31171 gcc_unreachable ();
31176 /* This function returns true if OP is a valid memory operand for the ldc and
31177 stc coprocessor instructions and false otherwise. */
31180 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31182 HOST_WIDE_INT range
;
31183 /* Has to be a memory operand. */
31189 /* We accept registers. */
31193 switch GET_CODE (op
)
31197 /* Or registers with an offset. */
31198 if (!REG_P (XEXP (op
, 0)))
31203 /* The offset must be an immediate though. */
31204 if (!CONST_INT_P (op
))
31207 range
= INTVAL (op
);
31209 /* Within the range of [-1020,1020]. */
31210 if (!IN_RANGE (range
, -1020, 1020))
31213 /* And a multiple of 4. */
31214 return (range
% 4) == 0;
31220 return REG_P (XEXP (op
, 0));
31222 gcc_unreachable ();
31228 namespace selftest
{
31230 /* Scan the static data tables generated by parsecpu.awk looking for
31231 potential issues with the data. We primarily check for
31232 inconsistencies in the option extensions at present (extensions
31233 that duplicate others but aren't marked as aliases). Furthermore,
31234 for correct canonicalization later options must never be a subset
31235 of an earlier option. Any extension should also only specify other
31236 feature bits and never an architecture bit. The architecture is inferred
31237 from the declaration of the extension. */
31239 arm_test_cpu_arch_data (void)
31241 const arch_option
*arch
;
31242 const cpu_option
*cpu
;
31243 auto_sbitmap
target_isa (isa_num_bits
);
31244 auto_sbitmap
isa1 (isa_num_bits
);
31245 auto_sbitmap
isa2 (isa_num_bits
);
31247 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
31249 const cpu_arch_extension
*ext1
, *ext2
;
31251 if (arch
->common
.extensions
== NULL
)
31254 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31256 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31261 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31262 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31264 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31267 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31268 /* If the option is a subset of the parent option, it doesn't
31269 add anything and so isn't useful. */
31270 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31272 /* If the extension specifies any architectural bits then
31273 disallow it. Extensions should only specify feature bits. */
31274 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31279 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
31281 const cpu_arch_extension
*ext1
, *ext2
;
31283 if (cpu
->common
.extensions
== NULL
)
31286 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31288 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31293 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31294 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31296 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31299 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31300 /* If the option is a subset of the parent option, it doesn't
31301 add anything and so isn't useful. */
31302 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31304 /* If the extension specifies any architectural bits then
31305 disallow it. Extensions should only specify feature bits. */
31306 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31313 arm_run_selftests (void)
31315 arm_test_cpu_arch_data ();
31317 } /* Namespace selftest. */
31319 #undef TARGET_RUN_TARGET_SELFTESTS
31320 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31321 #endif /* CHECKING_P */
31323 struct gcc_target targetm
= TARGET_INITIALIZER
;
31325 #include "gt-arm.h"