1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
40 #include "diagnostic-core.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
61 #include "target-globals.h"
63 #include "tm-constrs.h"
65 #include "optabs-libfuncs.h"
68 /* This file should be included last. */
69 #include "target-def.h"
71 /* Forward definitions of types. */
72 typedef struct minipool_node Mnode
;
73 typedef struct minipool_fixup Mfix
;
75 void (*arm_lang_output_object_attributes_hook
)(void);
82 /* Forward function declarations. */
83 static bool arm_const_not_ok_for_debug_p (rtx
);
84 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
85 static int arm_compute_static_chain_stack_bytes (void);
86 static arm_stack_offsets
*arm_get_frame_offsets (void);
87 static void arm_add_gc_roots (void);
88 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
89 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
90 static unsigned bit_count (unsigned long);
91 static unsigned bitmap_popcount (const sbitmap
);
92 static int arm_address_register_rtx_p (rtx
, int);
93 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
94 static bool is_called_in_ARM_mode (tree
);
95 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
96 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
97 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
98 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
99 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
100 inline static int thumb1_index_register_rtx_p (rtx
, int);
101 static int thumb_far_jump_used_p (void);
102 static bool thumb_force_lr_save (void);
103 static unsigned arm_size_return_regs (void);
104 static bool arm_assemble_integer (rtx
, unsigned int, int);
105 static void arm_print_operand (FILE *, rtx
, int);
106 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
107 static bool arm_print_operand_punct_valid_p (unsigned char code
);
108 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
109 static arm_cc
get_arm_condition_code (rtx
);
110 static const char *output_multi_immediate (rtx
*, const char *, const char *,
112 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
113 static struct machine_function
*arm_init_machine_status (void);
114 static void thumb_exit (FILE *, int);
115 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
116 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
117 static Mnode
*add_minipool_forward_ref (Mfix
*);
118 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
119 static Mnode
*add_minipool_backward_ref (Mfix
*);
120 static void assign_minipool_offsets (Mfix
*);
121 static void arm_print_value (FILE *, rtx
);
122 static void dump_minipool (rtx_insn
*);
123 static int arm_barrier_cost (rtx_insn
*);
124 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
125 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
126 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
128 static void arm_reorg (void);
129 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
130 static unsigned long arm_compute_save_reg0_reg12_mask (void);
131 static unsigned long arm_compute_save_reg_mask (void);
132 static unsigned long arm_isr_value (tree
);
133 static unsigned long arm_compute_func_type (void);
134 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
135 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
136 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
138 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
140 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
141 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
142 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
143 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
144 static int arm_comp_type_attributes (const_tree
, const_tree
);
145 static void arm_set_default_type_attributes (tree
);
146 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
147 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
148 static int optimal_immediate_sequence (enum rtx_code code
,
149 unsigned HOST_WIDE_INT val
,
150 struct four_ints
*return_sequence
);
151 static int optimal_immediate_sequence_1 (enum rtx_code code
,
152 unsigned HOST_WIDE_INT val
,
153 struct four_ints
*return_sequence
,
155 static int arm_get_strip_length (int);
156 static bool arm_function_ok_for_sibcall (tree
, tree
);
157 static machine_mode
arm_promote_function_mode (const_tree
,
160 static bool arm_return_in_memory (const_tree
, const_tree
);
161 static rtx
arm_function_value (const_tree
, const_tree
, bool);
162 static rtx
arm_libcall_value_1 (machine_mode
);
163 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
164 static bool arm_function_value_regno_p (const unsigned int);
165 static void arm_internal_label (FILE *, const char *, unsigned long);
166 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
168 static bool arm_have_conditional_execution (void);
169 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
170 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
171 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
172 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
173 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
174 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
175 static void emit_constant_insn (rtx cond
, rtx pattern
);
176 static rtx_insn
*emit_set_insn (rtx
, rtx
);
177 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
178 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
180 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
182 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
184 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
185 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
187 static rtx
aapcs_libcall_value (machine_mode
);
188 static int aapcs_select_return_coproc (const_tree
, const_tree
);
190 #ifdef OBJECT_FORMAT_ELF
191 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
192 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
195 static void arm_encode_section_info (tree
, rtx
, int);
198 static void arm_file_end (void);
199 static void arm_file_start (void);
200 static void arm_insert_attributes (tree
, tree
*);
202 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
204 static bool arm_pass_by_reference (cumulative_args_t
,
205 machine_mode
, const_tree
, bool);
206 static bool arm_promote_prototypes (const_tree
);
207 static bool arm_default_short_enums (void);
208 static bool arm_align_anon_bitfield (void);
209 static bool arm_return_in_msb (const_tree
);
210 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
211 static bool arm_return_in_memory (const_tree
, const_tree
);
213 static void arm_unwind_emit (FILE *, rtx_insn
*);
214 static bool arm_output_ttype (rtx
);
215 static void arm_asm_emit_except_personality (rtx
);
217 static void arm_asm_init_sections (void);
218 static rtx
arm_dwarf_register_span (rtx
);
220 static tree
arm_cxx_guard_type (void);
221 static bool arm_cxx_guard_mask_bit (void);
222 static tree
arm_get_cookie_size (tree
);
223 static bool arm_cookie_has_size (void);
224 static bool arm_cxx_cdtor_returns_this (void);
225 static bool arm_cxx_key_method_may_be_inline (void);
226 static void arm_cxx_determine_class_data_visibility (tree
);
227 static bool arm_cxx_class_data_always_comdat (void);
228 static bool arm_cxx_use_aeabi_atexit (void);
229 static void arm_init_libfuncs (void);
230 static tree
arm_build_builtin_va_list (void);
231 static void arm_expand_builtin_va_start (tree
, rtx
);
232 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
233 static void arm_option_override (void);
234 static void arm_option_restore (struct gcc_options
*,
235 struct cl_target_option
*);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option
*);
238 static void arm_set_current_function (tree
);
239 static bool arm_can_inline_p (tree
, tree
);
240 static void arm_relayout_function (tree
);
241 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
242 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
243 static bool arm_sched_can_speculate_insn (rtx_insn
*);
244 static bool arm_macro_fusion_p (void);
245 static bool arm_cannot_copy_insn_p (rtx_insn
*);
246 static int arm_issue_rate (void);
247 static int arm_first_cycle_multipass_dfa_lookahead (void);
248 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
249 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
250 static bool arm_output_addr_const_extra (FILE *, rtx
);
251 static bool arm_allocate_stack_slots_for_args (void);
252 static bool arm_warn_func_return (tree
);
253 static tree
arm_promoted_type (const_tree t
);
254 static bool arm_scalar_mode_supported_p (machine_mode
);
255 static bool arm_frame_pointer_required (void);
256 static bool arm_can_eliminate (const int, const int);
257 static void arm_asm_trampoline_template (FILE *);
258 static void arm_trampoline_init (rtx
, tree
, rtx
);
259 static rtx
arm_trampoline_adjust_address (rtx
);
260 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
261 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
262 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
263 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
264 static bool arm_array_mode_supported_p (machine_mode
,
265 unsigned HOST_WIDE_INT
);
266 static machine_mode
arm_preferred_simd_mode (machine_mode
);
267 static bool arm_class_likely_spilled_p (reg_class_t
);
268 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
269 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
270 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
274 static void arm_conditional_register_usage (void);
275 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
276 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
277 static unsigned int arm_autovectorize_vector_sizes (void);
278 static int arm_default_branch_cost (bool, bool);
279 static int arm_cortex_a5_branch_cost (bool, bool);
280 static int arm_cortex_m_branch_cost (bool, bool);
281 static int arm_cortex_m7_branch_cost (bool, bool);
283 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
284 const unsigned char *sel
);
286 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
288 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
290 int misalign ATTRIBUTE_UNUSED
);
291 static unsigned arm_add_stmt_cost (void *data
, int count
,
292 enum vect_cost_for_stmt kind
,
293 struct _stmt_vec_info
*stmt_info
,
295 enum vect_cost_model_location where
);
297 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
298 bool op0_preserve_value
);
299 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
301 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
302 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
304 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
305 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
306 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
308 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
309 static machine_mode
arm_floatn_mode (int, bool);
311 /* Table of machine attributes. */
312 static const struct attribute_spec arm_attribute_table
[] =
314 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
315 affects_type_identity } */
316 /* Function calls made to this symbol must be done indirectly, because
317 it may lie outside of the 26 bit addressing range of a normal function
319 { "long_call", 0, 0, false, true, true, NULL
, false },
320 /* Whereas these functions are always known to reside within the 26 bit
322 { "short_call", 0, 0, false, true, true, NULL
, false },
323 /* Specify the procedure call conventions for a function. */
324 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
326 /* Interrupt Service Routines have special prologue and epilogue requirements. */
327 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
329 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
331 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
334 /* ARM/PE has three new attributes:
336 dllexport - for exporting a function/variable that will live in a dll
337 dllimport - for importing a function/variable from a dll
339 Microsoft allows multiple declspecs in one __declspec, separating
340 them with spaces. We do NOT support this. Instead, use __declspec
343 { "dllimport", 0, 0, true, false, false, NULL
, false },
344 { "dllexport", 0, 0, true, false, false, NULL
, false },
345 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
347 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
348 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
349 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
350 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
353 /* ARMv8-M Security Extensions support. */
354 { "cmse_nonsecure_entry", 0, 0, true, false, false,
355 arm_handle_cmse_nonsecure_entry
, false },
356 { "cmse_nonsecure_call", 0, 0, true, false, false,
357 arm_handle_cmse_nonsecure_call
, true },
358 { NULL
, 0, 0, false, false, false, NULL
, false }
361 /* Initialize the GCC target structure. */
362 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
363 #undef TARGET_MERGE_DECL_ATTRIBUTES
364 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
367 #undef TARGET_LEGITIMIZE_ADDRESS
368 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
373 #undef TARGET_INSERT_ATTRIBUTES
374 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
376 #undef TARGET_ASM_FILE_START
377 #define TARGET_ASM_FILE_START arm_file_start
378 #undef TARGET_ASM_FILE_END
379 #define TARGET_ASM_FILE_END arm_file_end
381 #undef TARGET_ASM_ALIGNED_SI_OP
382 #define TARGET_ASM_ALIGNED_SI_OP NULL
383 #undef TARGET_ASM_INTEGER
384 #define TARGET_ASM_INTEGER arm_assemble_integer
386 #undef TARGET_PRINT_OPERAND
387 #define TARGET_PRINT_OPERAND arm_print_operand
388 #undef TARGET_PRINT_OPERAND_ADDRESS
389 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
390 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
391 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
393 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
394 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
396 #undef TARGET_ASM_FUNCTION_PROLOGUE
397 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
399 #undef TARGET_ASM_FUNCTION_EPILOGUE
400 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
402 #undef TARGET_CAN_INLINE_P
403 #define TARGET_CAN_INLINE_P arm_can_inline_p
405 #undef TARGET_RELAYOUT_FUNCTION
406 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
408 #undef TARGET_OPTION_OVERRIDE
409 #define TARGET_OPTION_OVERRIDE arm_option_override
411 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
412 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
414 #undef TARGET_OPTION_RESTORE
415 #define TARGET_OPTION_RESTORE arm_option_restore
417 #undef TARGET_OPTION_PRINT
418 #define TARGET_OPTION_PRINT arm_option_print
420 #undef TARGET_COMP_TYPE_ATTRIBUTES
421 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
423 #undef TARGET_SCHED_CAN_SPECULATE_INSN
424 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
426 #undef TARGET_SCHED_MACRO_FUSION_P
427 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
429 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
430 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
432 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
433 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
435 #undef TARGET_SCHED_ADJUST_COST
436 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
438 #undef TARGET_SET_CURRENT_FUNCTION
439 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
441 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
442 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
444 #undef TARGET_SCHED_REORDER
445 #define TARGET_SCHED_REORDER arm_sched_reorder
447 #undef TARGET_REGISTER_MOVE_COST
448 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
450 #undef TARGET_MEMORY_MOVE_COST
451 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
453 #undef TARGET_ENCODE_SECTION_INFO
455 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
457 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
460 #undef TARGET_STRIP_NAME_ENCODING
461 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
463 #undef TARGET_ASM_INTERNAL_LABEL
464 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
466 #undef TARGET_FLOATN_MODE
467 #define TARGET_FLOATN_MODE arm_floatn_mode
469 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
470 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
472 #undef TARGET_FUNCTION_VALUE
473 #define TARGET_FUNCTION_VALUE arm_function_value
475 #undef TARGET_LIBCALL_VALUE
476 #define TARGET_LIBCALL_VALUE arm_libcall_value
478 #undef TARGET_FUNCTION_VALUE_REGNO_P
479 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
481 #undef TARGET_ASM_OUTPUT_MI_THUNK
482 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
483 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
484 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
486 #undef TARGET_RTX_COSTS
487 #define TARGET_RTX_COSTS arm_rtx_costs
488 #undef TARGET_ADDRESS_COST
489 #define TARGET_ADDRESS_COST arm_address_cost
491 #undef TARGET_SHIFT_TRUNCATION_MASK
492 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
493 #undef TARGET_VECTOR_MODE_SUPPORTED_P
494 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
495 #undef TARGET_ARRAY_MODE_SUPPORTED_P
496 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
497 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
498 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
499 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
500 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
501 arm_autovectorize_vector_sizes
503 #undef TARGET_MACHINE_DEPENDENT_REORG
504 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
506 #undef TARGET_INIT_BUILTINS
507 #define TARGET_INIT_BUILTINS arm_init_builtins
508 #undef TARGET_EXPAND_BUILTIN
509 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
510 #undef TARGET_BUILTIN_DECL
511 #define TARGET_BUILTIN_DECL arm_builtin_decl
513 #undef TARGET_INIT_LIBFUNCS
514 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
516 #undef TARGET_PROMOTE_FUNCTION_MODE
517 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
518 #undef TARGET_PROMOTE_PROTOTYPES
519 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
520 #undef TARGET_PASS_BY_REFERENCE
521 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
522 #undef TARGET_ARG_PARTIAL_BYTES
523 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
524 #undef TARGET_FUNCTION_ARG
525 #define TARGET_FUNCTION_ARG arm_function_arg
526 #undef TARGET_FUNCTION_ARG_ADVANCE
527 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
528 #undef TARGET_FUNCTION_ARG_BOUNDARY
529 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
531 #undef TARGET_SETUP_INCOMING_VARARGS
532 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
534 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
535 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
537 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
538 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
539 #undef TARGET_TRAMPOLINE_INIT
540 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
541 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
542 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
544 #undef TARGET_WARN_FUNC_RETURN
545 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
547 #undef TARGET_DEFAULT_SHORT_ENUMS
548 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
550 #undef TARGET_ALIGN_ANON_BITFIELD
551 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
553 #undef TARGET_NARROW_VOLATILE_BITFIELD
554 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
556 #undef TARGET_CXX_GUARD_TYPE
557 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
559 #undef TARGET_CXX_GUARD_MASK_BIT
560 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
562 #undef TARGET_CXX_GET_COOKIE_SIZE
563 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
565 #undef TARGET_CXX_COOKIE_HAS_SIZE
566 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
568 #undef TARGET_CXX_CDTOR_RETURNS_THIS
569 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
571 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
572 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
574 #undef TARGET_CXX_USE_AEABI_ATEXIT
575 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
577 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
578 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
579 arm_cxx_determine_class_data_visibility
581 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
582 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
584 #undef TARGET_RETURN_IN_MSB
585 #define TARGET_RETURN_IN_MSB arm_return_in_msb
587 #undef TARGET_RETURN_IN_MEMORY
588 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
590 #undef TARGET_MUST_PASS_IN_STACK
591 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
594 #undef TARGET_ASM_UNWIND_EMIT
595 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
597 /* EABI unwinding tables use a different format for the typeinfo tables. */
598 #undef TARGET_ASM_TTYPE
599 #define TARGET_ASM_TTYPE arm_output_ttype
601 #undef TARGET_ARM_EABI_UNWINDER
602 #define TARGET_ARM_EABI_UNWINDER true
604 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
605 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
607 #endif /* ARM_UNWIND_INFO */
609 #undef TARGET_ASM_INIT_SECTIONS
610 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
612 #undef TARGET_DWARF_REGISTER_SPAN
613 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
615 #undef TARGET_CANNOT_COPY_INSN_P
616 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
619 #undef TARGET_HAVE_TLS
620 #define TARGET_HAVE_TLS true
623 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
624 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
626 #undef TARGET_LEGITIMATE_CONSTANT_P
627 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
629 #undef TARGET_CANNOT_FORCE_CONST_MEM
630 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
632 #undef TARGET_MAX_ANCHOR_OFFSET
633 #define TARGET_MAX_ANCHOR_OFFSET 4095
635 /* The minimum is set such that the total size of the block
636 for a particular anchor is -4088 + 1 + 4095 bytes, which is
637 divisible by eight, ensuring natural spacing of anchors. */
638 #undef TARGET_MIN_ANCHOR_OFFSET
639 #define TARGET_MIN_ANCHOR_OFFSET -4088
641 #undef TARGET_SCHED_ISSUE_RATE
642 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
644 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
645 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
646 arm_first_cycle_multipass_dfa_lookahead
648 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
649 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
650 arm_first_cycle_multipass_dfa_lookahead_guard
652 #undef TARGET_MANGLE_TYPE
653 #define TARGET_MANGLE_TYPE arm_mangle_type
655 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
656 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
658 #undef TARGET_BUILD_BUILTIN_VA_LIST
659 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
660 #undef TARGET_EXPAND_BUILTIN_VA_START
661 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
662 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
663 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
666 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
667 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
670 #undef TARGET_LEGITIMATE_ADDRESS_P
671 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
673 #undef TARGET_PREFERRED_RELOAD_CLASS
674 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
676 #undef TARGET_PROMOTED_TYPE
677 #define TARGET_PROMOTED_TYPE arm_promoted_type
679 #undef TARGET_SCALAR_MODE_SUPPORTED_P
680 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
682 #undef TARGET_FRAME_POINTER_REQUIRED
683 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
685 #undef TARGET_CAN_ELIMINATE
686 #define TARGET_CAN_ELIMINATE arm_can_eliminate
688 #undef TARGET_CONDITIONAL_REGISTER_USAGE
689 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
691 #undef TARGET_CLASS_LIKELY_SPILLED_P
692 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
694 #undef TARGET_VECTORIZE_BUILTINS
695 #define TARGET_VECTORIZE_BUILTINS
697 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
698 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
699 arm_builtin_vectorized_function
701 #undef TARGET_VECTOR_ALIGNMENT
702 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
704 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
705 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
706 arm_vector_alignment_reachable
708 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
709 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
710 arm_builtin_support_vector_misalignment
712 #undef TARGET_PREFERRED_RENAME_CLASS
713 #define TARGET_PREFERRED_RENAME_CLASS \
714 arm_preferred_rename_class
716 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
717 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
718 arm_vectorize_vec_perm_const_ok
720 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
721 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
722 arm_builtin_vectorization_cost
723 #undef TARGET_VECTORIZE_ADD_STMT_COST
724 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
726 #undef TARGET_CANONICALIZE_COMPARISON
727 #define TARGET_CANONICALIZE_COMPARISON \
728 arm_canonicalize_comparison
730 #undef TARGET_ASAN_SHADOW_OFFSET
731 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
733 #undef MAX_INSN_PER_IT_BLOCK
734 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
736 #undef TARGET_CAN_USE_DOLOOP_P
737 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
739 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
740 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
742 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
743 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
745 #undef TARGET_SCHED_FUSION_PRIORITY
746 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
748 #undef TARGET_ASM_FUNCTION_SECTION
749 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
751 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
752 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
754 #undef TARGET_SECTION_TYPE_FLAGS
755 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
757 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
758 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
760 #undef TARGET_C_EXCESS_PRECISION
761 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
763 /* Although the architecture reserves bits 0 and 1, only the former is
764 used for ARM/Thumb ISA selection in v7 and earlier versions. */
765 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
766 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
768 struct gcc_target targetm
= TARGET_INITIALIZER
;
770 /* Obstack for minipool constant handling. */
771 static struct obstack minipool_obstack
;
772 static char * minipool_startobj
;
774 /* The maximum number of insns skipped which
775 will be conditionalised if possible. */
776 static int max_insns_skipped
= 5;
778 extern FILE * asm_out_file
;
780 /* True if we are currently building a constant table. */
781 int making_const_table
;
783 /* The processor for which instructions should be scheduled. */
784 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
786 /* The current tuning set. */
787 const struct tune_params
*current_tune
;
789 /* Which floating point hardware to schedule for. */
792 /* Used for Thumb call_via trampolines. */
793 rtx thumb_call_via_label
[14];
794 static int thumb_call_reg_needed
;
796 /* The bits in this mask specify which instruction scheduling options should
798 unsigned int tune_flags
= 0;
800 /* The highest ARM architecture version supported by the
802 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
804 /* Active target architecture and tuning. */
806 struct arm_build_target arm_active_target
;
808 /* The following are used in the arm.md file as equivalents to bits
809 in the above two flag variables. */
811 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
814 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
817 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
820 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
823 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
826 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
829 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
832 /* Nonzero if this chip supports the ARM 6K extensions. */
835 /* Nonzero if this chip supports the ARM 6KZ extensions. */
838 /* Nonzero if instructions present in ARMv6-M can be used. */
841 /* Nonzero if this chip supports the ARM 7 extensions. */
844 /* Nonzero if this chip supports the Large Physical Address Extension. */
845 int arm_arch_lpae
= 0;
847 /* Nonzero if instructions not present in the 'M' profile can be used. */
848 int arm_arch_notm
= 0;
850 /* Nonzero if instructions present in ARMv7E-M can be used. */
853 /* Nonzero if instructions present in ARMv8 can be used. */
856 /* Nonzero if this chip supports the ARMv8.1 extensions. */
859 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
862 /* Nonzero if this chip supports the FP16 instructions extension of ARM
864 int arm_fp16_inst
= 0;
866 /* Nonzero if this chip can benefit from load scheduling. */
867 int arm_ld_sched
= 0;
869 /* Nonzero if this chip is a StrongARM. */
870 int arm_tune_strongarm
= 0;
872 /* Nonzero if this chip supports Intel Wireless MMX technology. */
873 int arm_arch_iwmmxt
= 0;
875 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
876 int arm_arch_iwmmxt2
= 0;
878 /* Nonzero if this chip is an XScale. */
879 int arm_arch_xscale
= 0;
881 /* Nonzero if tuning for XScale */
882 int arm_tune_xscale
= 0;
884 /* Nonzero if we want to tune for stores that access the write-buffer.
885 This typically means an ARM6 or ARM7 with MMU or MPU. */
886 int arm_tune_wbuf
= 0;
888 /* Nonzero if tuning for Cortex-A9. */
889 int arm_tune_cortex_a9
= 0;
891 /* Nonzero if we should define __THUMB_INTERWORK__ in the
893 XXX This is a bit of a hack, it's intended to help work around
894 problems in GLD which doesn't understand that armv5t code is
895 interworking clean. */
896 int arm_cpp_interwork
= 0;
898 /* Nonzero if chip supports Thumb 1. */
901 /* Nonzero if chip supports Thumb 2. */
904 /* Nonzero if chip supports integer division instruction. */
905 int arm_arch_arm_hwdiv
;
906 int arm_arch_thumb_hwdiv
;
908 /* Nonzero if chip disallows volatile memory access in IT block. */
909 int arm_arch_no_volatile_ce
;
911 /* Nonzero if we should use Neon to handle 64-bits operations rather
912 than core registers. */
913 int prefer_neon_for_64bits
= 0;
915 /* Nonzero if we shouldn't use literal pools. */
916 bool arm_disable_literal_pool
= false;
918 /* The register number to be used for the PIC offset register. */
919 unsigned arm_pic_register
= INVALID_REGNUM
;
921 enum arm_pcs arm_pcs_default
;
923 /* For an explanation of these variables, see final_prescan_insn below. */
925 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
926 enum arm_cond_code arm_current_cc
;
929 int arm_target_label
;
930 /* The number of conditionally executed insns, including the current insn. */
931 int arm_condexec_count
= 0;
932 /* A bitmask specifying the patterns for the IT block.
933 Zero means do not output an IT block before this insn. */
934 int arm_condexec_mask
= 0;
935 /* The number of bits used in arm_condexec_mask. */
936 int arm_condexec_masklen
= 0;
938 /* Nonzero if chip supports the ARMv8 CRC instructions. */
939 int arm_arch_crc
= 0;
941 /* Nonzero if chip supports the ARMv8-M security extensions. */
942 int arm_arch_cmse
= 0;
944 /* Nonzero if the core has a very small, high-latency, multiply unit. */
945 int arm_m_profile_small_mul
= 0;
947 /* The condition codes of the ARM, and the inverse function. */
948 static const char * const arm_condition_codes
[] =
950 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
951 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
954 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
955 int arm_regs_in_sequence
[] =
957 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
960 #define ARM_LSL_NAME "lsl"
961 #define streq(string1, string2) (strcmp (string1, string2) == 0)
963 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
964 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
965 | (1 << PIC_OFFSET_TABLE_REGNUM)))
967 /* Initialization code. */
971 const char *const name
;
972 enum processor_type core
;
973 unsigned int tune_flags
;
975 enum base_architecture base_arch
;
976 enum isa_feature isa_bits
[isa_num_bits
];
977 const struct tune_params
*const tune
;
981 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
982 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
989 /* arm generic vectorizer costs. */
991 struct cpu_vec_costs arm_default_vec_cost
= {
992 1, /* scalar_stmt_cost. */
993 1, /* scalar load_cost. */
994 1, /* scalar_store_cost. */
995 1, /* vec_stmt_cost. */
996 1, /* vec_to_scalar_cost. */
997 1, /* scalar_to_vec_cost. */
998 1, /* vec_align_load_cost. */
999 1, /* vec_unalign_load_cost. */
1000 1, /* vec_unalign_store_cost. */
1001 1, /* vec_store_cost. */
1002 3, /* cond_taken_branch_cost. */
1003 1, /* cond_not_taken_branch_cost. */
1006 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1007 #include "aarch-cost-tables.h"
1011 const struct cpu_cost_table cortexa9_extra_costs
=
1018 COSTS_N_INSNS (1), /* shift_reg. */
1019 COSTS_N_INSNS (1), /* arith_shift. */
1020 COSTS_N_INSNS (2), /* arith_shift_reg. */
1022 COSTS_N_INSNS (1), /* log_shift_reg. */
1023 COSTS_N_INSNS (1), /* extend. */
1024 COSTS_N_INSNS (2), /* extend_arith. */
1025 COSTS_N_INSNS (1), /* bfi. */
1026 COSTS_N_INSNS (1), /* bfx. */
1030 true /* non_exec_costs_exec. */
1035 COSTS_N_INSNS (3), /* simple. */
1036 COSTS_N_INSNS (3), /* flag_setting. */
1037 COSTS_N_INSNS (2), /* extend. */
1038 COSTS_N_INSNS (3), /* add. */
1039 COSTS_N_INSNS (2), /* extend_add. */
1040 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1044 0, /* simple (N/A). */
1045 0, /* flag_setting (N/A). */
1046 COSTS_N_INSNS (4), /* extend. */
1048 COSTS_N_INSNS (4), /* extend_add. */
1054 COSTS_N_INSNS (2), /* load. */
1055 COSTS_N_INSNS (2), /* load_sign_extend. */
1056 COSTS_N_INSNS (2), /* ldrd. */
1057 COSTS_N_INSNS (2), /* ldm_1st. */
1058 1, /* ldm_regs_per_insn_1st. */
1059 2, /* ldm_regs_per_insn_subsequent. */
1060 COSTS_N_INSNS (5), /* loadf. */
1061 COSTS_N_INSNS (5), /* loadd. */
1062 COSTS_N_INSNS (1), /* load_unaligned. */
1063 COSTS_N_INSNS (2), /* store. */
1064 COSTS_N_INSNS (2), /* strd. */
1065 COSTS_N_INSNS (2), /* stm_1st. */
1066 1, /* stm_regs_per_insn_1st. */
1067 2, /* stm_regs_per_insn_subsequent. */
1068 COSTS_N_INSNS (1), /* storef. */
1069 COSTS_N_INSNS (1), /* stored. */
1070 COSTS_N_INSNS (1), /* store_unaligned. */
1071 COSTS_N_INSNS (1), /* loadv. */
1072 COSTS_N_INSNS (1) /* storev. */
1077 COSTS_N_INSNS (14), /* div. */
1078 COSTS_N_INSNS (4), /* mult. */
1079 COSTS_N_INSNS (7), /* mult_addsub. */
1080 COSTS_N_INSNS (30), /* fma. */
1081 COSTS_N_INSNS (3), /* addsub. */
1082 COSTS_N_INSNS (1), /* fpconst. */
1083 COSTS_N_INSNS (1), /* neg. */
1084 COSTS_N_INSNS (3), /* compare. */
1085 COSTS_N_INSNS (3), /* widen. */
1086 COSTS_N_INSNS (3), /* narrow. */
1087 COSTS_N_INSNS (3), /* toint. */
1088 COSTS_N_INSNS (3), /* fromint. */
1089 COSTS_N_INSNS (3) /* roundint. */
1093 COSTS_N_INSNS (24), /* div. */
1094 COSTS_N_INSNS (5), /* mult. */
1095 COSTS_N_INSNS (8), /* mult_addsub. */
1096 COSTS_N_INSNS (30), /* fma. */
1097 COSTS_N_INSNS (3), /* addsub. */
1098 COSTS_N_INSNS (1), /* fpconst. */
1099 COSTS_N_INSNS (1), /* neg. */
1100 COSTS_N_INSNS (3), /* compare. */
1101 COSTS_N_INSNS (3), /* widen. */
1102 COSTS_N_INSNS (3), /* narrow. */
1103 COSTS_N_INSNS (3), /* toint. */
1104 COSTS_N_INSNS (3), /* fromint. */
1105 COSTS_N_INSNS (3) /* roundint. */
1110 COSTS_N_INSNS (1) /* alu. */
1114 const struct cpu_cost_table cortexa8_extra_costs
=
1120 COSTS_N_INSNS (1), /* shift. */
1122 COSTS_N_INSNS (1), /* arith_shift. */
1123 0, /* arith_shift_reg. */
1124 COSTS_N_INSNS (1), /* log_shift. */
1125 0, /* log_shift_reg. */
1127 0, /* extend_arith. */
1133 true /* non_exec_costs_exec. */
1138 COSTS_N_INSNS (1), /* simple. */
1139 COSTS_N_INSNS (1), /* flag_setting. */
1140 COSTS_N_INSNS (1), /* extend. */
1141 COSTS_N_INSNS (1), /* add. */
1142 COSTS_N_INSNS (1), /* extend_add. */
1143 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1147 0, /* simple (N/A). */
1148 0, /* flag_setting (N/A). */
1149 COSTS_N_INSNS (2), /* extend. */
1151 COSTS_N_INSNS (2), /* extend_add. */
1157 COSTS_N_INSNS (1), /* load. */
1158 COSTS_N_INSNS (1), /* load_sign_extend. */
1159 COSTS_N_INSNS (1), /* ldrd. */
1160 COSTS_N_INSNS (1), /* ldm_1st. */
1161 1, /* ldm_regs_per_insn_1st. */
1162 2, /* ldm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (1), /* loadf. */
1164 COSTS_N_INSNS (1), /* loadd. */
1165 COSTS_N_INSNS (1), /* load_unaligned. */
1166 COSTS_N_INSNS (1), /* store. */
1167 COSTS_N_INSNS (1), /* strd. */
1168 COSTS_N_INSNS (1), /* stm_1st. */
1169 1, /* stm_regs_per_insn_1st. */
1170 2, /* stm_regs_per_insn_subsequent. */
1171 COSTS_N_INSNS (1), /* storef. */
1172 COSTS_N_INSNS (1), /* stored. */
1173 COSTS_N_INSNS (1), /* store_unaligned. */
1174 COSTS_N_INSNS (1), /* loadv. */
1175 COSTS_N_INSNS (1) /* storev. */
1180 COSTS_N_INSNS (36), /* div. */
1181 COSTS_N_INSNS (11), /* mult. */
1182 COSTS_N_INSNS (20), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (9), /* addsub. */
1185 COSTS_N_INSNS (3), /* fpconst. */
1186 COSTS_N_INSNS (3), /* neg. */
1187 COSTS_N_INSNS (6), /* compare. */
1188 COSTS_N_INSNS (4), /* widen. */
1189 COSTS_N_INSNS (4), /* narrow. */
1190 COSTS_N_INSNS (8), /* toint. */
1191 COSTS_N_INSNS (8), /* fromint. */
1192 COSTS_N_INSNS (8) /* roundint. */
1196 COSTS_N_INSNS (64), /* div. */
1197 COSTS_N_INSNS (16), /* mult. */
1198 COSTS_N_INSNS (25), /* mult_addsub. */
1199 COSTS_N_INSNS (30), /* fma. */
1200 COSTS_N_INSNS (9), /* addsub. */
1201 COSTS_N_INSNS (3), /* fpconst. */
1202 COSTS_N_INSNS (3), /* neg. */
1203 COSTS_N_INSNS (6), /* compare. */
1204 COSTS_N_INSNS (6), /* widen. */
1205 COSTS_N_INSNS (6), /* narrow. */
1206 COSTS_N_INSNS (8), /* toint. */
1207 COSTS_N_INSNS (8), /* fromint. */
1208 COSTS_N_INSNS (8) /* roundint. */
1213 COSTS_N_INSNS (1) /* alu. */
1217 const struct cpu_cost_table cortexa5_extra_costs
=
1223 COSTS_N_INSNS (1), /* shift. */
1224 COSTS_N_INSNS (1), /* shift_reg. */
1225 COSTS_N_INSNS (1), /* arith_shift. */
1226 COSTS_N_INSNS (1), /* arith_shift_reg. */
1227 COSTS_N_INSNS (1), /* log_shift. */
1228 COSTS_N_INSNS (1), /* log_shift_reg. */
1229 COSTS_N_INSNS (1), /* extend. */
1230 COSTS_N_INSNS (1), /* extend_arith. */
1231 COSTS_N_INSNS (1), /* bfi. */
1232 COSTS_N_INSNS (1), /* bfx. */
1233 COSTS_N_INSNS (1), /* clz. */
1234 COSTS_N_INSNS (1), /* rev. */
1236 true /* non_exec_costs_exec. */
1243 COSTS_N_INSNS (1), /* flag_setting. */
1244 COSTS_N_INSNS (1), /* extend. */
1245 COSTS_N_INSNS (1), /* add. */
1246 COSTS_N_INSNS (1), /* extend_add. */
1247 COSTS_N_INSNS (7) /* idiv. */
1251 0, /* simple (N/A). */
1252 0, /* flag_setting (N/A). */
1253 COSTS_N_INSNS (1), /* extend. */
1255 COSTS_N_INSNS (2), /* extend_add. */
1261 COSTS_N_INSNS (1), /* load. */
1262 COSTS_N_INSNS (1), /* load_sign_extend. */
1263 COSTS_N_INSNS (6), /* ldrd. */
1264 COSTS_N_INSNS (1), /* ldm_1st. */
1265 1, /* ldm_regs_per_insn_1st. */
1266 2, /* ldm_regs_per_insn_subsequent. */
1267 COSTS_N_INSNS (2), /* loadf. */
1268 COSTS_N_INSNS (4), /* loadd. */
1269 COSTS_N_INSNS (1), /* load_unaligned. */
1270 COSTS_N_INSNS (1), /* store. */
1271 COSTS_N_INSNS (3), /* strd. */
1272 COSTS_N_INSNS (1), /* stm_1st. */
1273 1, /* stm_regs_per_insn_1st. */
1274 2, /* stm_regs_per_insn_subsequent. */
1275 COSTS_N_INSNS (2), /* storef. */
1276 COSTS_N_INSNS (2), /* stored. */
1277 COSTS_N_INSNS (1), /* store_unaligned. */
1278 COSTS_N_INSNS (1), /* loadv. */
1279 COSTS_N_INSNS (1) /* storev. */
1284 COSTS_N_INSNS (15), /* div. */
1285 COSTS_N_INSNS (3), /* mult. */
1286 COSTS_N_INSNS (7), /* mult_addsub. */
1287 COSTS_N_INSNS (7), /* fma. */
1288 COSTS_N_INSNS (3), /* addsub. */
1289 COSTS_N_INSNS (3), /* fpconst. */
1290 COSTS_N_INSNS (3), /* neg. */
1291 COSTS_N_INSNS (3), /* compare. */
1292 COSTS_N_INSNS (3), /* widen. */
1293 COSTS_N_INSNS (3), /* narrow. */
1294 COSTS_N_INSNS (3), /* toint. */
1295 COSTS_N_INSNS (3), /* fromint. */
1296 COSTS_N_INSNS (3) /* roundint. */
1300 COSTS_N_INSNS (30), /* div. */
1301 COSTS_N_INSNS (6), /* mult. */
1302 COSTS_N_INSNS (10), /* mult_addsub. */
1303 COSTS_N_INSNS (7), /* fma. */
1304 COSTS_N_INSNS (3), /* addsub. */
1305 COSTS_N_INSNS (3), /* fpconst. */
1306 COSTS_N_INSNS (3), /* neg. */
1307 COSTS_N_INSNS (3), /* compare. */
1308 COSTS_N_INSNS (3), /* widen. */
1309 COSTS_N_INSNS (3), /* narrow. */
1310 COSTS_N_INSNS (3), /* toint. */
1311 COSTS_N_INSNS (3), /* fromint. */
1312 COSTS_N_INSNS (3) /* roundint. */
1317 COSTS_N_INSNS (1) /* alu. */
1322 const struct cpu_cost_table cortexa7_extra_costs
=
1328 COSTS_N_INSNS (1), /* shift. */
1329 COSTS_N_INSNS (1), /* shift_reg. */
1330 COSTS_N_INSNS (1), /* arith_shift. */
1331 COSTS_N_INSNS (1), /* arith_shift_reg. */
1332 COSTS_N_INSNS (1), /* log_shift. */
1333 COSTS_N_INSNS (1), /* log_shift_reg. */
1334 COSTS_N_INSNS (1), /* extend. */
1335 COSTS_N_INSNS (1), /* extend_arith. */
1336 COSTS_N_INSNS (1), /* bfi. */
1337 COSTS_N_INSNS (1), /* bfx. */
1338 COSTS_N_INSNS (1), /* clz. */
1339 COSTS_N_INSNS (1), /* rev. */
1341 true /* non_exec_costs_exec. */
1348 COSTS_N_INSNS (1), /* flag_setting. */
1349 COSTS_N_INSNS (1), /* extend. */
1350 COSTS_N_INSNS (1), /* add. */
1351 COSTS_N_INSNS (1), /* extend_add. */
1352 COSTS_N_INSNS (7) /* idiv. */
1356 0, /* simple (N/A). */
1357 0, /* flag_setting (N/A). */
1358 COSTS_N_INSNS (1), /* extend. */
1360 COSTS_N_INSNS (2), /* extend_add. */
1366 COSTS_N_INSNS (1), /* load. */
1367 COSTS_N_INSNS (1), /* load_sign_extend. */
1368 COSTS_N_INSNS (3), /* ldrd. */
1369 COSTS_N_INSNS (1), /* ldm_1st. */
1370 1, /* ldm_regs_per_insn_1st. */
1371 2, /* ldm_regs_per_insn_subsequent. */
1372 COSTS_N_INSNS (2), /* loadf. */
1373 COSTS_N_INSNS (2), /* loadd. */
1374 COSTS_N_INSNS (1), /* load_unaligned. */
1375 COSTS_N_INSNS (1), /* store. */
1376 COSTS_N_INSNS (3), /* strd. */
1377 COSTS_N_INSNS (1), /* stm_1st. */
1378 1, /* stm_regs_per_insn_1st. */
1379 2, /* stm_regs_per_insn_subsequent. */
1380 COSTS_N_INSNS (2), /* storef. */
1381 COSTS_N_INSNS (2), /* stored. */
1382 COSTS_N_INSNS (1), /* store_unaligned. */
1383 COSTS_N_INSNS (1), /* loadv. */
1384 COSTS_N_INSNS (1) /* storev. */
1389 COSTS_N_INSNS (15), /* div. */
1390 COSTS_N_INSNS (3), /* mult. */
1391 COSTS_N_INSNS (7), /* mult_addsub. */
1392 COSTS_N_INSNS (7), /* fma. */
1393 COSTS_N_INSNS (3), /* addsub. */
1394 COSTS_N_INSNS (3), /* fpconst. */
1395 COSTS_N_INSNS (3), /* neg. */
1396 COSTS_N_INSNS (3), /* compare. */
1397 COSTS_N_INSNS (3), /* widen. */
1398 COSTS_N_INSNS (3), /* narrow. */
1399 COSTS_N_INSNS (3), /* toint. */
1400 COSTS_N_INSNS (3), /* fromint. */
1401 COSTS_N_INSNS (3) /* roundint. */
1405 COSTS_N_INSNS (30), /* div. */
1406 COSTS_N_INSNS (6), /* mult. */
1407 COSTS_N_INSNS (10), /* mult_addsub. */
1408 COSTS_N_INSNS (7), /* fma. */
1409 COSTS_N_INSNS (3), /* addsub. */
1410 COSTS_N_INSNS (3), /* fpconst. */
1411 COSTS_N_INSNS (3), /* neg. */
1412 COSTS_N_INSNS (3), /* compare. */
1413 COSTS_N_INSNS (3), /* widen. */
1414 COSTS_N_INSNS (3), /* narrow. */
1415 COSTS_N_INSNS (3), /* toint. */
1416 COSTS_N_INSNS (3), /* fromint. */
1417 COSTS_N_INSNS (3) /* roundint. */
1422 COSTS_N_INSNS (1) /* alu. */
1426 const struct cpu_cost_table cortexa12_extra_costs
=
1433 COSTS_N_INSNS (1), /* shift_reg. */
1434 COSTS_N_INSNS (1), /* arith_shift. */
1435 COSTS_N_INSNS (1), /* arith_shift_reg. */
1436 COSTS_N_INSNS (1), /* log_shift. */
1437 COSTS_N_INSNS (1), /* log_shift_reg. */
1439 COSTS_N_INSNS (1), /* extend_arith. */
1441 COSTS_N_INSNS (1), /* bfx. */
1442 COSTS_N_INSNS (1), /* clz. */
1443 COSTS_N_INSNS (1), /* rev. */
1445 true /* non_exec_costs_exec. */
1450 COSTS_N_INSNS (2), /* simple. */
1451 COSTS_N_INSNS (3), /* flag_setting. */
1452 COSTS_N_INSNS (2), /* extend. */
1453 COSTS_N_INSNS (3), /* add. */
1454 COSTS_N_INSNS (2), /* extend_add. */
1455 COSTS_N_INSNS (18) /* idiv. */
1459 0, /* simple (N/A). */
1460 0, /* flag_setting (N/A). */
1461 COSTS_N_INSNS (3), /* extend. */
1463 COSTS_N_INSNS (3), /* extend_add. */
1469 COSTS_N_INSNS (3), /* load. */
1470 COSTS_N_INSNS (3), /* load_sign_extend. */
1471 COSTS_N_INSNS (3), /* ldrd. */
1472 COSTS_N_INSNS (3), /* ldm_1st. */
1473 1, /* ldm_regs_per_insn_1st. */
1474 2, /* ldm_regs_per_insn_subsequent. */
1475 COSTS_N_INSNS (3), /* loadf. */
1476 COSTS_N_INSNS (3), /* loadd. */
1477 0, /* load_unaligned. */
1481 1, /* stm_regs_per_insn_1st. */
1482 2, /* stm_regs_per_insn_subsequent. */
1483 COSTS_N_INSNS (2), /* storef. */
1484 COSTS_N_INSNS (2), /* stored. */
1485 0, /* store_unaligned. */
1486 COSTS_N_INSNS (1), /* loadv. */
1487 COSTS_N_INSNS (1) /* storev. */
1492 COSTS_N_INSNS (17), /* div. */
1493 COSTS_N_INSNS (4), /* mult. */
1494 COSTS_N_INSNS (8), /* mult_addsub. */
1495 COSTS_N_INSNS (8), /* fma. */
1496 COSTS_N_INSNS (4), /* addsub. */
1497 COSTS_N_INSNS (2), /* fpconst. */
1498 COSTS_N_INSNS (2), /* neg. */
1499 COSTS_N_INSNS (2), /* compare. */
1500 COSTS_N_INSNS (4), /* widen. */
1501 COSTS_N_INSNS (4), /* narrow. */
1502 COSTS_N_INSNS (4), /* toint. */
1503 COSTS_N_INSNS (4), /* fromint. */
1504 COSTS_N_INSNS (4) /* roundint. */
1508 COSTS_N_INSNS (31), /* div. */
1509 COSTS_N_INSNS (4), /* mult. */
1510 COSTS_N_INSNS (8), /* mult_addsub. */
1511 COSTS_N_INSNS (8), /* fma. */
1512 COSTS_N_INSNS (4), /* addsub. */
1513 COSTS_N_INSNS (2), /* fpconst. */
1514 COSTS_N_INSNS (2), /* neg. */
1515 COSTS_N_INSNS (2), /* compare. */
1516 COSTS_N_INSNS (4), /* widen. */
1517 COSTS_N_INSNS (4), /* narrow. */
1518 COSTS_N_INSNS (4), /* toint. */
1519 COSTS_N_INSNS (4), /* fromint. */
1520 COSTS_N_INSNS (4) /* roundint. */
1525 COSTS_N_INSNS (1) /* alu. */
1529 const struct cpu_cost_table cortexa15_extra_costs
=
1537 COSTS_N_INSNS (1), /* arith_shift. */
1538 COSTS_N_INSNS (1), /* arith_shift_reg. */
1539 COSTS_N_INSNS (1), /* log_shift. */
1540 COSTS_N_INSNS (1), /* log_shift_reg. */
1542 COSTS_N_INSNS (1), /* extend_arith. */
1543 COSTS_N_INSNS (1), /* bfi. */
1548 true /* non_exec_costs_exec. */
1553 COSTS_N_INSNS (2), /* simple. */
1554 COSTS_N_INSNS (3), /* flag_setting. */
1555 COSTS_N_INSNS (2), /* extend. */
1556 COSTS_N_INSNS (2), /* add. */
1557 COSTS_N_INSNS (2), /* extend_add. */
1558 COSTS_N_INSNS (18) /* idiv. */
1562 0, /* simple (N/A). */
1563 0, /* flag_setting (N/A). */
1564 COSTS_N_INSNS (3), /* extend. */
1566 COSTS_N_INSNS (3), /* extend_add. */
1572 COSTS_N_INSNS (3), /* load. */
1573 COSTS_N_INSNS (3), /* load_sign_extend. */
1574 COSTS_N_INSNS (3), /* ldrd. */
1575 COSTS_N_INSNS (4), /* ldm_1st. */
1576 1, /* ldm_regs_per_insn_1st. */
1577 2, /* ldm_regs_per_insn_subsequent. */
1578 COSTS_N_INSNS (4), /* loadf. */
1579 COSTS_N_INSNS (4), /* loadd. */
1580 0, /* load_unaligned. */
1583 COSTS_N_INSNS (1), /* stm_1st. */
1584 1, /* stm_regs_per_insn_1st. */
1585 2, /* stm_regs_per_insn_subsequent. */
1588 0, /* store_unaligned. */
1589 COSTS_N_INSNS (1), /* loadv. */
1590 COSTS_N_INSNS (1) /* storev. */
1595 COSTS_N_INSNS (17), /* div. */
1596 COSTS_N_INSNS (4), /* mult. */
1597 COSTS_N_INSNS (8), /* mult_addsub. */
1598 COSTS_N_INSNS (8), /* fma. */
1599 COSTS_N_INSNS (4), /* addsub. */
1600 COSTS_N_INSNS (2), /* fpconst. */
1601 COSTS_N_INSNS (2), /* neg. */
1602 COSTS_N_INSNS (5), /* compare. */
1603 COSTS_N_INSNS (4), /* widen. */
1604 COSTS_N_INSNS (4), /* narrow. */
1605 COSTS_N_INSNS (4), /* toint. */
1606 COSTS_N_INSNS (4), /* fromint. */
1607 COSTS_N_INSNS (4) /* roundint. */
1611 COSTS_N_INSNS (31), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1628 COSTS_N_INSNS (1) /* alu. */
1632 const struct cpu_cost_table v7m_extra_costs
=
1640 0, /* arith_shift. */
1641 COSTS_N_INSNS (1), /* arith_shift_reg. */
1643 COSTS_N_INSNS (1), /* log_shift_reg. */
1645 COSTS_N_INSNS (1), /* extend_arith. */
1650 COSTS_N_INSNS (1), /* non_exec. */
1651 false /* non_exec_costs_exec. */
1656 COSTS_N_INSNS (1), /* simple. */
1657 COSTS_N_INSNS (1), /* flag_setting. */
1658 COSTS_N_INSNS (2), /* extend. */
1659 COSTS_N_INSNS (1), /* add. */
1660 COSTS_N_INSNS (3), /* extend_add. */
1661 COSTS_N_INSNS (8) /* idiv. */
1665 0, /* simple (N/A). */
1666 0, /* flag_setting (N/A). */
1667 COSTS_N_INSNS (2), /* extend. */
1669 COSTS_N_INSNS (3), /* extend_add. */
1675 COSTS_N_INSNS (2), /* load. */
1676 0, /* load_sign_extend. */
1677 COSTS_N_INSNS (3), /* ldrd. */
1678 COSTS_N_INSNS (2), /* ldm_1st. */
1679 1, /* ldm_regs_per_insn_1st. */
1680 1, /* ldm_regs_per_insn_subsequent. */
1681 COSTS_N_INSNS (2), /* loadf. */
1682 COSTS_N_INSNS (3), /* loadd. */
1683 COSTS_N_INSNS (1), /* load_unaligned. */
1684 COSTS_N_INSNS (2), /* store. */
1685 COSTS_N_INSNS (3), /* strd. */
1686 COSTS_N_INSNS (2), /* stm_1st. */
1687 1, /* stm_regs_per_insn_1st. */
1688 1, /* stm_regs_per_insn_subsequent. */
1689 COSTS_N_INSNS (2), /* storef. */
1690 COSTS_N_INSNS (3), /* stored. */
1691 COSTS_N_INSNS (1), /* store_unaligned. */
1692 COSTS_N_INSNS (1), /* loadv. */
1693 COSTS_N_INSNS (1) /* storev. */
1698 COSTS_N_INSNS (7), /* div. */
1699 COSTS_N_INSNS (2), /* mult. */
1700 COSTS_N_INSNS (5), /* mult_addsub. */
1701 COSTS_N_INSNS (3), /* fma. */
1702 COSTS_N_INSNS (1), /* addsub. */
1714 COSTS_N_INSNS (15), /* div. */
1715 COSTS_N_INSNS (5), /* mult. */
1716 COSTS_N_INSNS (7), /* mult_addsub. */
1717 COSTS_N_INSNS (7), /* fma. */
1718 COSTS_N_INSNS (3), /* addsub. */
1731 COSTS_N_INSNS (1) /* alu. */
1735 const struct tune_params arm_slowmul_tune
=
1737 &generic_extra_costs
, /* Insn extra costs. */
1738 NULL
, /* Sched adj cost. */
1739 arm_default_branch_cost
,
1740 &arm_default_vec_cost
,
1741 3, /* Constant limit. */
1742 5, /* Max cond insns. */
1743 8, /* Memset max inline. */
1744 1, /* Issue rate. */
1745 ARM_PREFETCH_NOT_BENEFICIAL
,
1746 tune_params::PREF_CONST_POOL_TRUE
,
1747 tune_params::PREF_LDRD_FALSE
,
1748 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1750 tune_params::DISPARAGE_FLAGS_NEITHER
,
1751 tune_params::PREF_NEON_64_FALSE
,
1752 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1753 tune_params::FUSE_NOTHING
,
1754 tune_params::SCHED_AUTOPREF_OFF
1757 const struct tune_params arm_fastmul_tune
=
1759 &generic_extra_costs
, /* Insn extra costs. */
1760 NULL
, /* Sched adj cost. */
1761 arm_default_branch_cost
,
1762 &arm_default_vec_cost
,
1763 1, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 8, /* Memset max inline. */
1766 1, /* Issue rate. */
1767 ARM_PREFETCH_NOT_BENEFICIAL
,
1768 tune_params::PREF_CONST_POOL_TRUE
,
1769 tune_params::PREF_LDRD_FALSE
,
1770 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1771 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1772 tune_params::DISPARAGE_FLAGS_NEITHER
,
1773 tune_params::PREF_NEON_64_FALSE
,
1774 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1775 tune_params::FUSE_NOTHING
,
1776 tune_params::SCHED_AUTOPREF_OFF
1779 /* StrongARM has early execution of branches, so a sequence that is worth
1780 skipping is shorter. Set max_insns_skipped to a lower value. */
1782 const struct tune_params arm_strongarm_tune
=
1784 &generic_extra_costs
, /* Insn extra costs. */
1785 NULL
, /* Sched adj cost. */
1786 arm_default_branch_cost
,
1787 &arm_default_vec_cost
,
1788 1, /* Constant limit. */
1789 3, /* Max cond insns. */
1790 8, /* Memset max inline. */
1791 1, /* Issue rate. */
1792 ARM_PREFETCH_NOT_BENEFICIAL
,
1793 tune_params::PREF_CONST_POOL_TRUE
,
1794 tune_params::PREF_LDRD_FALSE
,
1795 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1796 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1797 tune_params::DISPARAGE_FLAGS_NEITHER
,
1798 tune_params::PREF_NEON_64_FALSE
,
1799 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1800 tune_params::FUSE_NOTHING
,
1801 tune_params::SCHED_AUTOPREF_OFF
1804 const struct tune_params arm_xscale_tune
=
1806 &generic_extra_costs
, /* Insn extra costs. */
1807 xscale_sched_adjust_cost
,
1808 arm_default_branch_cost
,
1809 &arm_default_vec_cost
,
1810 2, /* Constant limit. */
1811 3, /* Max cond insns. */
1812 8, /* Memset max inline. */
1813 1, /* Issue rate. */
1814 ARM_PREFETCH_NOT_BENEFICIAL
,
1815 tune_params::PREF_CONST_POOL_TRUE
,
1816 tune_params::PREF_LDRD_FALSE
,
1817 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1819 tune_params::DISPARAGE_FLAGS_NEITHER
,
1820 tune_params::PREF_NEON_64_FALSE
,
1821 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1822 tune_params::FUSE_NOTHING
,
1823 tune_params::SCHED_AUTOPREF_OFF
1826 const struct tune_params arm_9e_tune
=
1828 &generic_extra_costs
, /* Insn extra costs. */
1829 NULL
, /* Sched adj cost. */
1830 arm_default_branch_cost
,
1831 &arm_default_vec_cost
,
1832 1, /* Constant limit. */
1833 5, /* Max cond insns. */
1834 8, /* Memset max inline. */
1835 1, /* Issue rate. */
1836 ARM_PREFETCH_NOT_BENEFICIAL
,
1837 tune_params::PREF_CONST_POOL_TRUE
,
1838 tune_params::PREF_LDRD_FALSE
,
1839 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1840 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1841 tune_params::DISPARAGE_FLAGS_NEITHER
,
1842 tune_params::PREF_NEON_64_FALSE
,
1843 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1844 tune_params::FUSE_NOTHING
,
1845 tune_params::SCHED_AUTOPREF_OFF
1848 const struct tune_params arm_marvell_pj4_tune
=
1850 &generic_extra_costs
, /* Insn extra costs. */
1851 NULL
, /* Sched adj cost. */
1852 arm_default_branch_cost
,
1853 &arm_default_vec_cost
,
1854 1, /* Constant limit. */
1855 5, /* Max cond insns. */
1856 8, /* Memset max inline. */
1857 2, /* Issue rate. */
1858 ARM_PREFETCH_NOT_BENEFICIAL
,
1859 tune_params::PREF_CONST_POOL_TRUE
,
1860 tune_params::PREF_LDRD_FALSE
,
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1863 tune_params::DISPARAGE_FLAGS_NEITHER
,
1864 tune_params::PREF_NEON_64_FALSE
,
1865 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1866 tune_params::FUSE_NOTHING
,
1867 tune_params::SCHED_AUTOPREF_OFF
1870 const struct tune_params arm_v6t2_tune
=
1872 &generic_extra_costs
, /* Insn extra costs. */
1873 NULL
, /* Sched adj cost. */
1874 arm_default_branch_cost
,
1875 &arm_default_vec_cost
,
1876 1, /* Constant limit. */
1877 5, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 1, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL
,
1881 tune_params::PREF_CONST_POOL_FALSE
,
1882 tune_params::PREF_LDRD_FALSE
,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER
,
1886 tune_params::PREF_NEON_64_FALSE
,
1887 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1888 tune_params::FUSE_NOTHING
,
1889 tune_params::SCHED_AUTOPREF_OFF
1893 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1894 const struct tune_params arm_cortex_tune
=
1896 &generic_extra_costs
,
1897 NULL
, /* Sched adj cost. */
1898 arm_default_branch_cost
,
1899 &arm_default_vec_cost
,
1900 1, /* Constant limit. */
1901 5, /* Max cond insns. */
1902 8, /* Memset max inline. */
1903 2, /* Issue rate. */
1904 ARM_PREFETCH_NOT_BENEFICIAL
,
1905 tune_params::PREF_CONST_POOL_FALSE
,
1906 tune_params::PREF_LDRD_FALSE
,
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1909 tune_params::DISPARAGE_FLAGS_NEITHER
,
1910 tune_params::PREF_NEON_64_FALSE
,
1911 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1912 tune_params::FUSE_NOTHING
,
1913 tune_params::SCHED_AUTOPREF_OFF
1916 const struct tune_params arm_cortex_a8_tune
=
1918 &cortexa8_extra_costs
,
1919 NULL
, /* Sched adj cost. */
1920 arm_default_branch_cost
,
1921 &arm_default_vec_cost
,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 2, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL
,
1927 tune_params::PREF_CONST_POOL_FALSE
,
1928 tune_params::PREF_LDRD_FALSE
,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER
,
1932 tune_params::PREF_NEON_64_FALSE
,
1933 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1934 tune_params::FUSE_NOTHING
,
1935 tune_params::SCHED_AUTOPREF_OFF
1938 const struct tune_params arm_cortex_a7_tune
=
1940 &cortexa7_extra_costs
,
1941 NULL
, /* Sched adj cost. */
1942 arm_default_branch_cost
,
1943 &arm_default_vec_cost
,
1944 1, /* Constant limit. */
1945 5, /* Max cond insns. */
1946 8, /* Memset max inline. */
1947 2, /* Issue rate. */
1948 ARM_PREFETCH_NOT_BENEFICIAL
,
1949 tune_params::PREF_CONST_POOL_FALSE
,
1950 tune_params::PREF_LDRD_FALSE
,
1951 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1953 tune_params::DISPARAGE_FLAGS_NEITHER
,
1954 tune_params::PREF_NEON_64_FALSE
,
1955 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1956 tune_params::FUSE_NOTHING
,
1957 tune_params::SCHED_AUTOPREF_OFF
1960 const struct tune_params arm_cortex_a15_tune
=
1962 &cortexa15_extra_costs
,
1963 NULL
, /* Sched adj cost. */
1964 arm_default_branch_cost
,
1965 &arm_default_vec_cost
,
1966 1, /* Constant limit. */
1967 2, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 3, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL
,
1971 tune_params::PREF_CONST_POOL_FALSE
,
1972 tune_params::PREF_LDRD_TRUE
,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_ALL
,
1976 tune_params::PREF_NEON_64_FALSE
,
1977 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1978 tune_params::FUSE_NOTHING
,
1979 tune_params::SCHED_AUTOPREF_FULL
1982 const struct tune_params arm_cortex_a35_tune
=
1984 &cortexa53_extra_costs
,
1985 NULL
, /* Sched adj cost. */
1986 arm_default_branch_cost
,
1987 &arm_default_vec_cost
,
1988 1, /* Constant limit. */
1989 5, /* Max cond insns. */
1990 8, /* Memset max inline. */
1991 1, /* Issue rate. */
1992 ARM_PREFETCH_NOT_BENEFICIAL
,
1993 tune_params::PREF_CONST_POOL_FALSE
,
1994 tune_params::PREF_LDRD_FALSE
,
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1997 tune_params::DISPARAGE_FLAGS_NEITHER
,
1998 tune_params::PREF_NEON_64_FALSE
,
1999 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2000 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2001 tune_params::SCHED_AUTOPREF_OFF
2004 const struct tune_params arm_cortex_a53_tune
=
2006 &cortexa53_extra_costs
,
2007 NULL
, /* Sched adj cost. */
2008 arm_default_branch_cost
,
2009 &arm_default_vec_cost
,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 2, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL
,
2015 tune_params::PREF_CONST_POOL_FALSE
,
2016 tune_params::PREF_LDRD_FALSE
,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER
,
2020 tune_params::PREF_NEON_64_FALSE
,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2022 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2023 tune_params::SCHED_AUTOPREF_OFF
2026 const struct tune_params arm_cortex_a57_tune
=
2028 &cortexa57_extra_costs
,
2029 NULL
, /* Sched adj cost. */
2030 arm_default_branch_cost
,
2031 &arm_default_vec_cost
,
2032 1, /* Constant limit. */
2033 2, /* Max cond insns. */
2034 8, /* Memset max inline. */
2035 3, /* Issue rate. */
2036 ARM_PREFETCH_NOT_BENEFICIAL
,
2037 tune_params::PREF_CONST_POOL_FALSE
,
2038 tune_params::PREF_LDRD_TRUE
,
2039 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2041 tune_params::DISPARAGE_FLAGS_ALL
,
2042 tune_params::PREF_NEON_64_FALSE
,
2043 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2044 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2045 tune_params::SCHED_AUTOPREF_FULL
2048 const struct tune_params arm_exynosm1_tune
=
2050 &exynosm1_extra_costs
,
2051 NULL
, /* Sched adj cost. */
2052 arm_default_branch_cost
,
2053 &arm_default_vec_cost
,
2054 1, /* Constant limit. */
2055 2, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 3, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL
,
2059 tune_params::PREF_CONST_POOL_FALSE
,
2060 tune_params::PREF_LDRD_TRUE
,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_ALL
,
2064 tune_params::PREF_NEON_64_FALSE
,
2065 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2066 tune_params::FUSE_NOTHING
,
2067 tune_params::SCHED_AUTOPREF_OFF
2070 const struct tune_params arm_xgene1_tune
=
2072 &xgene1_extra_costs
,
2073 NULL
, /* Sched adj cost. */
2074 arm_default_branch_cost
,
2075 &arm_default_vec_cost
,
2076 1, /* Constant limit. */
2077 2, /* Max cond insns. */
2078 32, /* Memset max inline. */
2079 4, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL
,
2081 tune_params::PREF_CONST_POOL_FALSE
,
2082 tune_params::PREF_LDRD_TRUE
,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_ALL
,
2086 tune_params::PREF_NEON_64_FALSE
,
2087 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2088 tune_params::FUSE_NOTHING
,
2089 tune_params::SCHED_AUTOPREF_OFF
2092 const struct tune_params arm_qdf24xx_tune
=
2094 &qdf24xx_extra_costs
,
2095 NULL
, /* Scheduler cost adjustment. */
2096 arm_default_branch_cost
,
2097 &arm_default_vec_cost
, /* Vectorizer costs. */
2098 1, /* Constant limit. */
2099 2, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 4, /* Issue rate. */
2102 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2103 tune_params::PREF_CONST_POOL_FALSE
,
2104 tune_params::PREF_LDRD_TRUE
,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_ALL
,
2108 tune_params::PREF_NEON_64_FALSE
,
2109 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2110 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2111 tune_params::SCHED_AUTOPREF_FULL
2114 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2115 less appealing. Set max_insns_skipped to a low value. */
2117 const struct tune_params arm_cortex_a5_tune
=
2119 &cortexa5_extra_costs
,
2120 NULL
, /* Sched adj cost. */
2121 arm_cortex_a5_branch_cost
,
2122 &arm_default_vec_cost
,
2123 1, /* Constant limit. */
2124 1, /* Max cond insns. */
2125 8, /* Memset max inline. */
2126 2, /* Issue rate. */
2127 ARM_PREFETCH_NOT_BENEFICIAL
,
2128 tune_params::PREF_CONST_POOL_FALSE
,
2129 tune_params::PREF_LDRD_FALSE
,
2130 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2132 tune_params::DISPARAGE_FLAGS_NEITHER
,
2133 tune_params::PREF_NEON_64_FALSE
,
2134 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2135 tune_params::FUSE_NOTHING
,
2136 tune_params::SCHED_AUTOPREF_OFF
2139 const struct tune_params arm_cortex_a9_tune
=
2141 &cortexa9_extra_costs
,
2142 cortex_a9_sched_adjust_cost
,
2143 arm_default_branch_cost
,
2144 &arm_default_vec_cost
,
2145 1, /* Constant limit. */
2146 5, /* Max cond insns. */
2147 8, /* Memset max inline. */
2148 2, /* Issue rate. */
2149 ARM_PREFETCH_BENEFICIAL(4,32,32),
2150 tune_params::PREF_CONST_POOL_FALSE
,
2151 tune_params::PREF_LDRD_FALSE
,
2152 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2153 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2154 tune_params::DISPARAGE_FLAGS_NEITHER
,
2155 tune_params::PREF_NEON_64_FALSE
,
2156 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2157 tune_params::FUSE_NOTHING
,
2158 tune_params::SCHED_AUTOPREF_OFF
2161 const struct tune_params arm_cortex_a12_tune
=
2163 &cortexa12_extra_costs
,
2164 NULL
, /* Sched adj cost. */
2165 arm_default_branch_cost
,
2166 &arm_default_vec_cost
, /* Vectorizer costs. */
2167 1, /* Constant limit. */
2168 2, /* Max cond insns. */
2169 8, /* Memset max inline. */
2170 2, /* Issue rate. */
2171 ARM_PREFETCH_NOT_BENEFICIAL
,
2172 tune_params::PREF_CONST_POOL_FALSE
,
2173 tune_params::PREF_LDRD_TRUE
,
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2175 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2176 tune_params::DISPARAGE_FLAGS_ALL
,
2177 tune_params::PREF_NEON_64_FALSE
,
2178 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2179 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2180 tune_params::SCHED_AUTOPREF_OFF
2183 const struct tune_params arm_cortex_a73_tune
=
2185 &cortexa57_extra_costs
,
2186 NULL
, /* Sched adj cost. */
2187 arm_default_branch_cost
,
2188 &arm_default_vec_cost
, /* Vectorizer costs. */
2189 1, /* Constant limit. */
2190 2, /* Max cond insns. */
2191 8, /* Memset max inline. */
2192 2, /* Issue rate. */
2193 ARM_PREFETCH_NOT_BENEFICIAL
,
2194 tune_params::PREF_CONST_POOL_FALSE
,
2195 tune_params::PREF_LDRD_TRUE
,
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2198 tune_params::DISPARAGE_FLAGS_ALL
,
2199 tune_params::PREF_NEON_64_FALSE
,
2200 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2201 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2202 tune_params::SCHED_AUTOPREF_FULL
2205 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2206 cycle to execute each. An LDR from the constant pool also takes two cycles
2207 to execute, but mildly increases pipelining opportunity (consecutive
2208 loads/stores can be pipelined together, saving one cycle), and may also
2209 improve icache utilisation. Hence we prefer the constant pool for such
2212 const struct tune_params arm_v7m_tune
=
2215 NULL
, /* Sched adj cost. */
2216 arm_cortex_m_branch_cost
,
2217 &arm_default_vec_cost
,
2218 1, /* Constant limit. */
2219 2, /* Max cond insns. */
2220 8, /* Memset max inline. */
2221 1, /* Issue rate. */
2222 ARM_PREFETCH_NOT_BENEFICIAL
,
2223 tune_params::PREF_CONST_POOL_TRUE
,
2224 tune_params::PREF_LDRD_FALSE
,
2225 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2226 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2227 tune_params::DISPARAGE_FLAGS_NEITHER
,
2228 tune_params::PREF_NEON_64_FALSE
,
2229 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2230 tune_params::FUSE_NOTHING
,
2231 tune_params::SCHED_AUTOPREF_OFF
2234 /* Cortex-M7 tuning. */
2236 const struct tune_params arm_cortex_m7_tune
=
2239 NULL
, /* Sched adj cost. */
2240 arm_cortex_m7_branch_cost
,
2241 &arm_default_vec_cost
,
2242 0, /* Constant limit. */
2243 1, /* Max cond insns. */
2244 8, /* Memset max inline. */
2245 2, /* Issue rate. */
2246 ARM_PREFETCH_NOT_BENEFICIAL
,
2247 tune_params::PREF_CONST_POOL_TRUE
,
2248 tune_params::PREF_LDRD_FALSE
,
2249 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2251 tune_params::DISPARAGE_FLAGS_NEITHER
,
2252 tune_params::PREF_NEON_64_FALSE
,
2253 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2254 tune_params::FUSE_NOTHING
,
2255 tune_params::SCHED_AUTOPREF_OFF
2258 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2259 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2261 const struct tune_params arm_v6m_tune
=
2263 &generic_extra_costs
, /* Insn extra costs. */
2264 NULL
, /* Sched adj cost. */
2265 arm_default_branch_cost
,
2266 &arm_default_vec_cost
, /* Vectorizer costs. */
2267 1, /* Constant limit. */
2268 5, /* Max cond insns. */
2269 8, /* Memset max inline. */
2270 1, /* Issue rate. */
2271 ARM_PREFETCH_NOT_BENEFICIAL
,
2272 tune_params::PREF_CONST_POOL_FALSE
,
2273 tune_params::PREF_LDRD_FALSE
,
2274 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2275 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2276 tune_params::DISPARAGE_FLAGS_NEITHER
,
2277 tune_params::PREF_NEON_64_FALSE
,
2278 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2279 tune_params::FUSE_NOTHING
,
2280 tune_params::SCHED_AUTOPREF_OFF
2283 const struct tune_params arm_fa726te_tune
=
2285 &generic_extra_costs
, /* Insn extra costs. */
2286 fa726te_sched_adjust_cost
,
2287 arm_default_branch_cost
,
2288 &arm_default_vec_cost
,
2289 1, /* Constant limit. */
2290 5, /* Max cond insns. */
2291 8, /* Memset max inline. */
2292 2, /* Issue rate. */
2293 ARM_PREFETCH_NOT_BENEFICIAL
,
2294 tune_params::PREF_CONST_POOL_TRUE
,
2295 tune_params::PREF_LDRD_FALSE
,
2296 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2297 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2298 tune_params::DISPARAGE_FLAGS_NEITHER
,
2299 tune_params::PREF_NEON_64_FALSE
,
2300 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2301 tune_params::FUSE_NOTHING
,
2302 tune_params::SCHED_AUTOPREF_OFF
2305 /* Auto-generated CPU, FPU and architecture tables. */
2306 #include "arm-cpu-data.h"
2308 /* The name of the preprocessor macro to define for this architecture. PROFILE
2309 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2310 is thus chosen to be big enough to hold the longest architecture name. */
2312 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2314 /* Supported TLS relocations. */
2322 TLS_DESCSEQ
/* GNU scheme */
2325 /* The maximum number of insns to be used when loading a constant. */
2327 arm_constant_limit (bool size_p
)
2329 return size_p
? 1 : current_tune
->constant_limit
;
2332 /* Emit an insn that's a simple single-set. Both the operands must be known
2334 inline static rtx_insn
*
2335 emit_set_insn (rtx x
, rtx y
)
2337 return emit_insn (gen_rtx_SET (x
, y
));
2340 /* Return the number of bits set in VALUE. */
2342 bit_count (unsigned long value
)
2344 unsigned long count
= 0;
2349 value
&= value
- 1; /* Clear the least-significant set bit. */
2355 /* Return the number of bits set in BMAP. */
2357 bitmap_popcount (const sbitmap bmap
)
2359 unsigned int count
= 0;
2361 sbitmap_iterator sbi
;
2363 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2372 } arm_fixed_mode_set
;
2374 /* A small helper for setting fixed-point library libfuncs. */
2377 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2378 const char *funcname
, const char *modename
,
2383 if (num_suffix
== 0)
2384 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2386 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2388 set_optab_libfunc (optable
, mode
, buffer
);
2392 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2393 machine_mode from
, const char *funcname
,
2394 const char *toname
, const char *fromname
)
2397 const char *maybe_suffix_2
= "";
2399 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2400 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2401 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2402 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2403 maybe_suffix_2
= "2";
2405 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2408 set_conv_libfunc (optable
, to
, from
, buffer
);
2411 /* Set up library functions unique to ARM. */
2414 arm_init_libfuncs (void)
2416 /* For Linux, we have access to kernel support for atomic operations. */
2417 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2418 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2420 /* There are no special library functions unless we are using the
2425 /* The functions below are described in Section 4 of the "Run-Time
2426 ABI for the ARM architecture", Version 1.0. */
2428 /* Double-precision floating-point arithmetic. Table 2. */
2429 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2430 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2431 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2432 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2433 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2435 /* Double-precision comparisons. Table 3. */
2436 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2437 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2438 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2439 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2440 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2441 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2442 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2444 /* Single-precision floating-point arithmetic. Table 4. */
2445 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2446 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2447 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2448 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2449 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2451 /* Single-precision comparisons. Table 5. */
2452 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2453 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2454 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2455 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2456 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2457 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2458 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2460 /* Floating-point to integer conversions. Table 6. */
2461 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2462 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2463 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2464 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2465 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2466 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2467 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2468 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2470 /* Conversions between floating types. Table 7. */
2471 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2472 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2474 /* Integer to floating-point conversions. Table 8. */
2475 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2476 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2477 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2478 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2479 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2480 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2481 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2482 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2484 /* Long long. Table 9. */
2485 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2486 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2487 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2488 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2489 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2490 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2491 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2492 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2494 /* Integer (32/32->32) division. \S 4.3.1. */
2495 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2496 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2498 /* The divmod functions are designed so that they can be used for
2499 plain division, even though they return both the quotient and the
2500 remainder. The quotient is returned in the usual location (i.e.,
2501 r0 for SImode, {r0, r1} for DImode), just as would be expected
2502 for an ordinary division routine. Because the AAPCS calling
2503 conventions specify that all of { r0, r1, r2, r3 } are
2504 callee-saved registers, there is no need to tell the compiler
2505 explicitly that those registers are clobbered by these
2507 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2508 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2510 /* For SImode division the ABI provides div-without-mod routines,
2511 which are faster. */
2512 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2513 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2515 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2516 divmod libcalls instead. */
2517 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2518 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2519 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2520 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2522 /* Half-precision float operations. The compiler handles all operations
2523 with NULL libfuncs by converting the SFmode. */
2524 switch (arm_fp16_format
)
2526 case ARM_FP16_FORMAT_IEEE
:
2527 case ARM_FP16_FORMAT_ALTERNATIVE
:
2530 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2531 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2533 : "__gnu_f2h_alternative"));
2534 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2535 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2537 : "__gnu_h2f_alternative"));
2539 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2540 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2542 : "__gnu_d2h_alternative"));
2545 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2546 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2547 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2548 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2549 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2552 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2553 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2554 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2555 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2556 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2557 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2558 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2565 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2567 const arm_fixed_mode_set fixed_arith_modes
[] =
2588 const arm_fixed_mode_set fixed_conv_modes
[] =
2618 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2620 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2621 "add", fixed_arith_modes
[i
].name
, 3);
2622 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2623 "ssadd", fixed_arith_modes
[i
].name
, 3);
2624 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2625 "usadd", fixed_arith_modes
[i
].name
, 3);
2626 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2627 "sub", fixed_arith_modes
[i
].name
, 3);
2628 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2629 "sssub", fixed_arith_modes
[i
].name
, 3);
2630 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2631 "ussub", fixed_arith_modes
[i
].name
, 3);
2632 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2633 "mul", fixed_arith_modes
[i
].name
, 3);
2634 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2635 "ssmul", fixed_arith_modes
[i
].name
, 3);
2636 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2637 "usmul", fixed_arith_modes
[i
].name
, 3);
2638 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2639 "div", fixed_arith_modes
[i
].name
, 3);
2640 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2641 "udiv", fixed_arith_modes
[i
].name
, 3);
2642 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2643 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2644 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2645 "usdiv", fixed_arith_modes
[i
].name
, 3);
2646 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2647 "neg", fixed_arith_modes
[i
].name
, 2);
2648 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2649 "ssneg", fixed_arith_modes
[i
].name
, 2);
2650 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2651 "usneg", fixed_arith_modes
[i
].name
, 2);
2652 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2653 "ashl", fixed_arith_modes
[i
].name
, 3);
2654 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2655 "ashr", fixed_arith_modes
[i
].name
, 3);
2656 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2657 "lshr", fixed_arith_modes
[i
].name
, 3);
2658 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2659 "ssashl", fixed_arith_modes
[i
].name
, 3);
2660 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2661 "usashl", fixed_arith_modes
[i
].name
, 3);
2662 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2663 "cmp", fixed_arith_modes
[i
].name
, 2);
2666 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2667 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2670 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2671 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2674 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2675 fixed_conv_modes
[j
].mode
, "fract",
2676 fixed_conv_modes
[i
].name
,
2677 fixed_conv_modes
[j
].name
);
2678 arm_set_fixed_conv_libfunc (satfract_optab
,
2679 fixed_conv_modes
[i
].mode
,
2680 fixed_conv_modes
[j
].mode
, "satfract",
2681 fixed_conv_modes
[i
].name
,
2682 fixed_conv_modes
[j
].name
);
2683 arm_set_fixed_conv_libfunc (fractuns_optab
,
2684 fixed_conv_modes
[i
].mode
,
2685 fixed_conv_modes
[j
].mode
, "fractuns",
2686 fixed_conv_modes
[i
].name
,
2687 fixed_conv_modes
[j
].name
);
2688 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2689 fixed_conv_modes
[i
].mode
,
2690 fixed_conv_modes
[j
].mode
, "satfractuns",
2691 fixed_conv_modes
[i
].name
,
2692 fixed_conv_modes
[j
].name
);
2696 if (TARGET_AAPCS_BASED
)
2697 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2700 /* On AAPCS systems, this is the "struct __va_list". */
2701 static GTY(()) tree va_list_type
;
2703 /* Return the type to use as __builtin_va_list. */
2705 arm_build_builtin_va_list (void)
2710 if (!TARGET_AAPCS_BASED
)
2711 return std_build_builtin_va_list ();
2713 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2721 The C Library ABI further reinforces this definition in \S
2724 We must follow this definition exactly. The structure tag
2725 name is visible in C++ mangled names, and thus forms a part
2726 of the ABI. The field name may be used by people who
2727 #include <stdarg.h>. */
2728 /* Create the type. */
2729 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2730 /* Give it the required name. */
2731 va_list_name
= build_decl (BUILTINS_LOCATION
,
2733 get_identifier ("__va_list"),
2735 DECL_ARTIFICIAL (va_list_name
) = 1;
2736 TYPE_NAME (va_list_type
) = va_list_name
;
2737 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2738 /* Create the __ap field. */
2739 ap_field
= build_decl (BUILTINS_LOCATION
,
2741 get_identifier ("__ap"),
2743 DECL_ARTIFICIAL (ap_field
) = 1;
2744 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2745 TYPE_FIELDS (va_list_type
) = ap_field
;
2746 /* Compute its layout. */
2747 layout_type (va_list_type
);
2749 return va_list_type
;
2752 /* Return an expression of type "void *" pointing to the next
2753 available argument in a variable-argument list. VALIST is the
2754 user-level va_list object, of type __builtin_va_list. */
2756 arm_extract_valist_ptr (tree valist
)
2758 if (TREE_TYPE (valist
) == error_mark_node
)
2759 return error_mark_node
;
2761 /* On an AAPCS target, the pointer is stored within "struct
2763 if (TARGET_AAPCS_BASED
)
2765 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2766 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2767 valist
, ap_field
, NULL_TREE
);
2773 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2775 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2777 valist
= arm_extract_valist_ptr (valist
);
2778 std_expand_builtin_va_start (valist
, nextarg
);
2781 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2783 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2786 valist
= arm_extract_valist_ptr (valist
);
2787 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2790 /* Check any incompatible options that the user has specified. */
2792 arm_option_check_internal (struct gcc_options
*opts
)
2794 int flags
= opts
->x_target_flags
;
2796 /* iWMMXt and NEON are incompatible. */
2798 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2799 error ("iWMMXt and NEON are incompatible");
2801 /* Make sure that the processor choice does not conflict with any of the
2802 other command line choices. */
2803 if (TARGET_ARM_P (flags
)
2804 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2805 error ("target CPU does not support ARM mode");
2807 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2808 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2809 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2811 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2812 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2814 /* If this target is normally configured to use APCS frames, warn if they
2815 are turned off and debugging is turned on. */
2816 if (TARGET_ARM_P (flags
)
2817 && write_symbols
!= NO_DEBUG
2818 && !TARGET_APCS_FRAME
2819 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2820 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2822 /* iWMMXt unsupported under Thumb mode. */
2823 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2824 error ("iWMMXt unsupported under Thumb mode");
2826 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2827 error ("can not use -mtp=cp15 with 16-bit Thumb");
2829 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2831 error ("RTP PIC is incompatible with Thumb");
2835 /* We only support -mslow-flash-data on armv7-m targets. */
2836 if (target_slow_flash_data
2837 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2838 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2839 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2841 /* We only support pure-code on Thumb-2 M-profile targets. */
2842 if (target_pure_code
2843 && (!arm_arch_thumb2
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2844 error ("-mpure-code only supports non-pic code on armv7-m targets");
2848 /* Recompute the global settings depending on target attribute options. */
2851 arm_option_params_internal (void)
2853 /* If we are not using the default (ARM mode) section anchor offset
2854 ranges, then set the correct ranges now. */
2857 /* Thumb-1 LDR instructions cannot have negative offsets.
2858 Permissible positive offset ranges are 5-bit (for byte loads),
2859 6-bit (for halfword loads), or 7-bit (for word loads).
2860 Empirical results suggest a 7-bit anchor range gives the best
2861 overall code size. */
2862 targetm
.min_anchor_offset
= 0;
2863 targetm
.max_anchor_offset
= 127;
2865 else if (TARGET_THUMB2
)
2867 /* The minimum is set such that the total size of the block
2868 for a particular anchor is 248 + 1 + 4095 bytes, which is
2869 divisible by eight, ensuring natural spacing of anchors. */
2870 targetm
.min_anchor_offset
= -248;
2871 targetm
.max_anchor_offset
= 4095;
2875 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2876 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2881 /* If optimizing for size, bump the number of instructions that we
2882 are prepared to conditionally execute (even on a StrongARM). */
2883 max_insns_skipped
= 6;
2885 /* For THUMB2, we limit the conditional sequence to one IT block. */
2887 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2890 /* When -mrestrict-it is in use tone down the if-conversion. */
2891 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2892 ? 1 : current_tune
->max_insns_skipped
;
2895 /* True if -mflip-thumb should next add an attribute for the default
2896 mode, false if it should next add an attribute for the opposite mode. */
2897 static GTY(()) bool thumb_flipper
;
2899 /* Options after initial target override. */
2900 static GTY(()) tree init_optimize
;
2903 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2905 if (opts
->x_align_functions
<= 0)
2906 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2907 && opts
->x_optimize_size
? 2 : 4;
2910 /* Implement targetm.override_options_after_change. */
2913 arm_override_options_after_change (void)
2915 arm_configure_build_target (&arm_active_target
,
2916 TREE_TARGET_OPTION (target_option_default_node
),
2917 &global_options_set
, false);
2919 arm_override_options_after_change_1 (&global_options
);
2923 arm_option_restore (struct gcc_options
*, struct cl_target_option
*ptr
)
2925 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2929 /* Reset options between modes that the user has specified. */
2931 arm_option_override_internal (struct gcc_options
*opts
,
2932 struct gcc_options
*opts_set
)
2934 arm_override_options_after_change_1 (opts
);
2936 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2938 /* The default is to enable interworking, so this warning message would
2939 be confusing to users who have just compiled with, eg, -march=armv3. */
2940 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2941 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2944 if (TARGET_THUMB_P (opts
->x_target_flags
)
2945 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2947 warning (0, "target CPU does not support THUMB instructions");
2948 opts
->x_target_flags
&= ~MASK_THUMB
;
2951 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2953 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2954 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2957 /* Callee super interworking implies thumb interworking. Adding
2958 this to the flags here simplifies the logic elsewhere. */
2959 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2960 opts
->x_target_flags
|= MASK_INTERWORK
;
2962 /* need to remember initial values so combinaisons of options like
2963 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2964 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2966 if (! opts_set
->x_arm_restrict_it
)
2967 opts
->x_arm_restrict_it
= arm_arch8
;
2969 /* ARM execution state and M profile don't have [restrict] IT. */
2970 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2971 opts
->x_arm_restrict_it
= 0;
2973 /* Enable -munaligned-access by default for
2974 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2975 i.e. Thumb2 and ARM state only.
2976 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2977 - ARMv8 architecture-base processors.
2979 Disable -munaligned-access by default for
2980 - all pre-ARMv6 architecture-based processors
2981 - ARMv6-M architecture-based processors
2982 - ARMv8-M Baseline processors. */
2984 if (! opts_set
->x_unaligned_access
)
2986 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2987 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2989 else if (opts
->x_unaligned_access
== 1
2990 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2992 warning (0, "target CPU does not support unaligned accesses");
2993 opts
->x_unaligned_access
= 0;
2996 /* Don't warn since it's on by default in -O2. */
2997 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2998 opts
->x_flag_schedule_insns
= 0;
3000 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3002 /* Disable shrink-wrap when optimizing function for size, since it tends to
3003 generate additional returns. */
3004 if (optimize_function_for_size_p (cfun
)
3005 && TARGET_THUMB2_P (opts
->x_target_flags
))
3006 opts
->x_flag_shrink_wrap
= false;
3008 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3010 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3011 - epilogue_insns - does not accurately model the corresponding insns
3012 emitted in the asm file. In particular, see the comment in thumb_exit
3013 'Find out how many of the (return) argument registers we can corrupt'.
3014 As a consequence, the epilogue may clobber registers without fipa-ra
3015 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3016 TODO: Accurately model clobbers for epilogue_insns and reenable
3018 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3019 opts
->x_flag_ipa_ra
= 0;
3021 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3023 /* Thumb2 inline assembly code should always use unified syntax.
3024 This will apply to ARM and Thumb1 eventually. */
3025 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3027 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3028 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3032 /* Convert a static initializer array of feature bits to sbitmap
3035 arm_initialize_isa (sbitmap isa
, const enum isa_feature
*isa_bits
)
3038 while (*isa_bits
!= isa_nobit
)
3039 bitmap_set_bit (isa
, *(isa_bits
++));
3042 static sbitmap isa_all_fpubits
;
3043 static sbitmap isa_quirkbits
;
3045 /* Configure a build target TARGET from the user-specified options OPTS and
3046 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3047 architecture have been specified, but the two are not identical. */
3049 arm_configure_build_target (struct arm_build_target
*target
,
3050 struct cl_target_option
*opts
,
3051 struct gcc_options
*opts_set
,
3052 bool warn_compatible
)
3054 const struct processors
*arm_selected_tune
= NULL
;
3055 const struct processors
*arm_selected_arch
= NULL
;
3056 const struct processors
*arm_selected_cpu
= NULL
;
3057 const struct arm_fpu_desc
*arm_selected_fpu
= NULL
;
3059 bitmap_clear (target
->isa
);
3060 target
->core_name
= NULL
;
3061 target
->arch_name
= NULL
;
3063 if (opts_set
->x_arm_arch_option
)
3064 arm_selected_arch
= &all_architectures
[opts
->x_arm_arch_option
];
3066 if (opts_set
->x_arm_cpu_option
)
3068 arm_selected_cpu
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3069 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3072 if (opts_set
->x_arm_tune_option
)
3073 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_tune_option
];
3075 if (arm_selected_arch
)
3077 arm_initialize_isa (target
->isa
, arm_selected_arch
->isa_bits
);
3079 if (arm_selected_cpu
)
3081 auto_sbitmap
cpu_isa (isa_num_bits
);
3083 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->isa_bits
);
3084 bitmap_xor (cpu_isa
, cpu_isa
, target
->isa
);
3085 /* Ignore any bits that are quirk bits. */
3086 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_quirkbits
);
3087 /* Ignore (for now) any bits that might be set by -mfpu. */
3088 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_all_fpubits
);
3090 if (!bitmap_empty_p (cpu_isa
))
3092 if (warn_compatible
)
3093 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3094 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3095 /* -march wins for code generation.
3096 -mcpu wins for default tuning. */
3097 if (!arm_selected_tune
)
3098 arm_selected_tune
= arm_selected_cpu
;
3100 arm_selected_cpu
= arm_selected_arch
;
3104 /* Architecture and CPU are essentially the same.
3105 Prefer the CPU setting. */
3106 arm_selected_arch
= NULL
;
3109 target
->core_name
= arm_selected_cpu
->name
;
3113 /* Pick a CPU based on the architecture. */
3114 arm_selected_cpu
= arm_selected_arch
;
3115 target
->arch_name
= arm_selected_arch
->name
;
3116 /* Note: target->core_name is left unset in this path. */
3119 else if (arm_selected_cpu
)
3121 target
->core_name
= arm_selected_cpu
->name
;
3122 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3124 /* If the user did not specify a processor, choose one for them. */
3127 const struct processors
* sel
;
3128 auto_sbitmap
sought_isa (isa_num_bits
);
3129 bitmap_clear (sought_isa
);
3130 auto_sbitmap
default_isa (isa_num_bits
);
3132 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3133 gcc_assert (arm_selected_cpu
->name
);
3135 /* RWE: All of the selection logic below (to the end of this
3136 'if' clause) looks somewhat suspect. It appears to be mostly
3137 there to support forcing thumb support when the default CPU
3138 does not have thumb (somewhat dubious in terms of what the
3139 user might be expecting). I think it should be removed once
3140 support for the pre-thumb era cores is removed. */
3141 sel
= arm_selected_cpu
;
3142 arm_initialize_isa (default_isa
, sel
->isa_bits
);
3144 /* Now check to see if the user has specified any command line
3145 switches that require certain abilities from the cpu. */
3147 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3149 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3150 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3152 /* There are no ARM processors that support both APCS-26 and
3153 interworking. Therefore we forcibly remove MODE26 from
3154 from the isa features here (if it was set), so that the
3155 search below will always be able to find a compatible
3157 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3160 /* If there are such requirements and the default CPU does not
3161 satisfy them, we need to run over the complete list of
3162 cores looking for one that is satisfactory. */
3163 if (!bitmap_empty_p (sought_isa
)
3164 && !bitmap_subset_p (sought_isa
, default_isa
))
3166 auto_sbitmap
candidate_isa (isa_num_bits
);
3167 /* We're only interested in a CPU with at least the
3168 capabilities of the default CPU and the required
3169 additional features. */
3170 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3172 /* Try to locate a CPU type that supports all of the abilities
3173 of the default CPU, plus the extra abilities requested by
3175 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3177 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3178 /* An exact match? */
3179 if (bitmap_equal_p (default_isa
, candidate_isa
))
3183 if (sel
->name
== NULL
)
3185 unsigned current_bit_count
= isa_num_bits
;
3186 const struct processors
* best_fit
= NULL
;
3188 /* Ideally we would like to issue an error message here
3189 saying that it was not possible to find a CPU compatible
3190 with the default CPU, but which also supports the command
3191 line options specified by the programmer, and so they
3192 ought to use the -mcpu=<name> command line option to
3193 override the default CPU type.
3195 If we cannot find a CPU that has exactly the
3196 characteristics of the default CPU and the given
3197 command line options we scan the array again looking
3198 for a best match. The best match must have at least
3199 the capabilities of the perfect match. */
3200 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3202 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3204 if (bitmap_subset_p (default_isa
, candidate_isa
))
3208 bitmap_and_compl (candidate_isa
, candidate_isa
,
3210 count
= bitmap_popcount (candidate_isa
);
3212 if (count
< current_bit_count
)
3215 current_bit_count
= count
;
3219 gcc_assert (best_fit
);
3223 arm_selected_cpu
= sel
;
3226 /* Now we know the CPU, we can finally initialize the target
3228 target
->core_name
= arm_selected_cpu
->name
;
3229 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3232 gcc_assert (arm_selected_cpu
);
3234 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3236 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3237 auto_sbitmap
fpu_bits (isa_num_bits
);
3239 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3240 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3241 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3243 else if (target
->core_name
== NULL
)
3244 /* To support this we need to be able to parse FPU feature options
3245 from the architecture string. */
3246 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3248 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3249 if (!arm_selected_tune
)
3250 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3252 /* Finish initializing the target structure. */
3253 target
->arch_pp_name
= arm_selected_cpu
->arch
;
3254 target
->base_arch
= arm_selected_cpu
->base_arch
;
3255 target
->arch_core
= arm_selected_cpu
->core
;
3257 target
->tune_flags
= arm_selected_tune
->tune_flags
;
3258 target
->tune
= arm_selected_tune
->tune
;
3259 target
->tune_core
= arm_selected_tune
->core
;
3262 /* Fix up any incompatible options that the user has specified. */
3264 arm_option_override (void)
3266 static const enum isa_feature fpu_bitlist
[] = { ISA_ALL_FPU
, isa_nobit
};
3267 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3268 cl_target_option opts
;
3270 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3271 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3273 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3274 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3276 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3278 if (!global_options_set
.x_arm_fpu_index
)
3280 const char *target_fpu_name
;
3284 #ifdef FPUTYPE_DEFAULT
3285 target_fpu_name
= FPUTYPE_DEFAULT
;
3287 target_fpu_name
= "vfp";
3290 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &fpu_index
,
3293 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3296 cl_target_option_save (&opts
, &global_options
);
3297 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3300 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3301 SUBTARGET_OVERRIDE_OPTIONS
;
3304 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3305 arm_base_arch
= arm_active_target
.base_arch
;
3307 arm_tune
= arm_active_target
.tune_core
;
3308 tune_flags
= arm_active_target
.tune_flags
;
3309 current_tune
= arm_active_target
.tune
;
3311 /* TBD: Dwarf info for apcs frame is not handled yet. */
3312 if (TARGET_APCS_FRAME
)
3313 flag_shrink_wrap
= false;
3315 /* BPABI targets use linker tricks to allow interworking on cores
3316 without thumb support. */
3317 if (TARGET_INTERWORK
3319 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3321 warning (0, "target CPU does not support interworking" );
3322 target_flags
&= ~MASK_INTERWORK
;
3325 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3327 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3328 target_flags
|= MASK_APCS_FRAME
;
3331 if (TARGET_POKE_FUNCTION_NAME
)
3332 target_flags
|= MASK_APCS_FRAME
;
3334 if (TARGET_APCS_REENT
&& flag_pic
)
3335 error ("-fpic and -mapcs-reent are incompatible");
3337 if (TARGET_APCS_REENT
)
3338 warning (0, "APCS reentrant code not supported. Ignored");
3340 /* Initialize boolean versions of the architectural flags, for use
3341 in the arm.md file. */
3342 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv3m
);
3343 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv4
);
3344 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3345 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5
);
3346 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5e
);
3347 arm_arch5te
= arm_arch5e
3348 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3349 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6
);
3350 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6k
);
3351 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3352 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3353 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7
);
3354 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7em
);
3355 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8
);
3356 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_1
);
3357 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_2
);
3358 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3359 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3360 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3361 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3362 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3363 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3364 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3365 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3366 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3367 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3368 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3371 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3372 error ("selected fp16 options are incompatible");
3373 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3377 /* Set up some tuning parameters. */
3378 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3379 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3380 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3381 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3382 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3383 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3385 /* And finally, set up some quirks. */
3386 arm_arch_no_volatile_ce
3387 = bitmap_bit_p (arm_active_target
.isa
, isa_quirk_no_volatile_ce
);
3389 = arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
, isa_quirk_ARMv6kz
);
3391 /* V5 code we generate is completely interworking capable, so we turn off
3392 TARGET_INTERWORK here to avoid many tests later on. */
3394 /* XXX However, we must pass the right pre-processor defines to CPP
3395 or GLD can get confused. This is a hack. */
3396 if (TARGET_INTERWORK
)
3397 arm_cpp_interwork
= 1;
3400 target_flags
&= ~MASK_INTERWORK
;
3402 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3403 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3405 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3406 error ("iwmmxt abi requires an iwmmxt capable cpu");
3408 /* If soft-float is specified then don't use FPU. */
3409 if (TARGET_SOFT_FLOAT
)
3410 arm_fpu_attr
= FPU_NONE
;
3412 arm_fpu_attr
= FPU_VFP
;
3414 if (TARGET_AAPCS_BASED
)
3416 if (TARGET_CALLER_INTERWORKING
)
3417 error ("AAPCS does not support -mcaller-super-interworking");
3419 if (TARGET_CALLEE_INTERWORKING
)
3420 error ("AAPCS does not support -mcallee-super-interworking");
3423 /* __fp16 support currently assumes the core has ldrh. */
3424 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3425 sorry ("__fp16 and no ldrh");
3427 if (TARGET_AAPCS_BASED
)
3429 if (arm_abi
== ARM_ABI_IWMMXT
)
3430 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3431 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3432 && TARGET_HARD_FLOAT
)
3434 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3435 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_VFPv2
))
3436 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3439 arm_pcs_default
= ARM_PCS_AAPCS
;
3443 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3444 sorry ("-mfloat-abi=hard and VFP");
3446 if (arm_abi
== ARM_ABI_APCS
)
3447 arm_pcs_default
= ARM_PCS_APCS
;
3449 arm_pcs_default
= ARM_PCS_ATPCS
;
3452 /* For arm2/3 there is no need to do any scheduling if we are doing
3453 software floating-point. */
3454 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3455 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3457 /* Use the cp15 method if it is available. */
3458 if (target_thread_pointer
== TP_AUTO
)
3460 if (arm_arch6k
&& !TARGET_THUMB1
)
3461 target_thread_pointer
= TP_CP15
;
3463 target_thread_pointer
= TP_SOFT
;
3466 /* Override the default structure alignment for AAPCS ABI. */
3467 if (!global_options_set
.x_arm_structure_size_boundary
)
3469 if (TARGET_AAPCS_BASED
)
3470 arm_structure_size_boundary
= 8;
3474 if (arm_structure_size_boundary
!= 8
3475 && arm_structure_size_boundary
!= 32
3476 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3478 if (ARM_DOUBLEWORD_ALIGN
)
3480 "structure size boundary can only be set to 8, 32 or 64");
3482 warning (0, "structure size boundary can only be set to 8 or 32");
3483 arm_structure_size_boundary
3484 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3488 if (TARGET_VXWORKS_RTP
)
3490 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3491 arm_pic_data_is_text_relative
= 0;
3494 && !arm_pic_data_is_text_relative
3495 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3496 /* When text & data segments don't have a fixed displacement, the
3497 intended use is with a single, read only, pic base register.
3498 Unless the user explicitly requested not to do that, set
3500 target_flags
|= MASK_SINGLE_PIC_BASE
;
3502 /* If stack checking is disabled, we can use r10 as the PIC register,
3503 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3504 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3506 if (TARGET_VXWORKS_RTP
)
3507 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3508 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3511 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3512 arm_pic_register
= 9;
3514 if (arm_pic_register_string
!= NULL
)
3516 int pic_register
= decode_reg_name (arm_pic_register_string
);
3519 warning (0, "-mpic-register= is useless without -fpic");
3521 /* Prevent the user from choosing an obviously stupid PIC register. */
3522 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3523 || pic_register
== HARD_FRAME_POINTER_REGNUM
3524 || pic_register
== STACK_POINTER_REGNUM
3525 || pic_register
>= PC_REGNUM
3526 || (TARGET_VXWORKS_RTP
3527 && (unsigned int) pic_register
!= arm_pic_register
))
3528 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3530 arm_pic_register
= pic_register
;
3533 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3534 if (fix_cm3_ldrd
== 2)
3536 if (bitmap_bit_p (arm_active_target
.isa
, isa_quirk_cm3_ldrd
))
3542 /* Hot/Cold partitioning is not currently supported, since we can't
3543 handle literal pool placement in that case. */
3544 if (flag_reorder_blocks_and_partition
)
3546 inform (input_location
,
3547 "-freorder-blocks-and-partition not supported on this architecture");
3548 flag_reorder_blocks_and_partition
= 0;
3549 flag_reorder_blocks
= 1;
3553 /* Hoisting PIC address calculations more aggressively provides a small,
3554 but measurable, size reduction for PIC code. Therefore, we decrease
3555 the bar for unrestricted expression hoisting to the cost of PIC address
3556 calculation, which is 2 instructions. */
3557 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3558 global_options
.x_param_values
,
3559 global_options_set
.x_param_values
);
3561 /* ARM EABI defaults to strict volatile bitfields. */
3562 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3563 && abi_version_at_least(2))
3564 flag_strict_volatile_bitfields
= 1;
3566 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3567 have deemed it beneficial (signified by setting
3568 prefetch.num_slots to 1 or more). */
3569 if (flag_prefetch_loop_arrays
< 0
3572 && current_tune
->prefetch
.num_slots
> 0)
3573 flag_prefetch_loop_arrays
= 1;
3575 /* Set up parameters to be used in prefetching algorithm. Do not
3576 override the defaults unless we are tuning for a core we have
3577 researched values for. */
3578 if (current_tune
->prefetch
.num_slots
> 0)
3579 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3580 current_tune
->prefetch
.num_slots
,
3581 global_options
.x_param_values
,
3582 global_options_set
.x_param_values
);
3583 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3584 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3585 current_tune
->prefetch
.l1_cache_line_size
,
3586 global_options
.x_param_values
,
3587 global_options_set
.x_param_values
);
3588 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3589 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3590 current_tune
->prefetch
.l1_cache_size
,
3591 global_options
.x_param_values
,
3592 global_options_set
.x_param_values
);
3594 /* Use Neon to perform 64-bits operations rather than core
3596 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3597 if (use_neon_for_64bits
== 1)
3598 prefer_neon_for_64bits
= true;
3600 /* Use the alternative scheduling-pressure algorithm by default. */
3601 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3602 global_options
.x_param_values
,
3603 global_options_set
.x_param_values
);
3605 /* Look through ready list and all of queue for instructions
3606 relevant for L2 auto-prefetcher. */
3607 int param_sched_autopref_queue_depth
;
3609 switch (current_tune
->sched_autopref
)
3611 case tune_params::SCHED_AUTOPREF_OFF
:
3612 param_sched_autopref_queue_depth
= -1;
3615 case tune_params::SCHED_AUTOPREF_RANK
:
3616 param_sched_autopref_queue_depth
= 0;
3619 case tune_params::SCHED_AUTOPREF_FULL
:
3620 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3627 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3628 param_sched_autopref_queue_depth
,
3629 global_options
.x_param_values
,
3630 global_options_set
.x_param_values
);
3632 /* Currently, for slow flash data, we just disable literal pools. We also
3633 disable it for pure-code. */
3634 if (target_slow_flash_data
|| target_pure_code
)
3635 arm_disable_literal_pool
= true;
3637 if (use_cmse
&& !arm_arch_cmse
)
3638 error ("target CPU does not support ARMv8-M Security Extensions");
3640 /* Disable scheduling fusion by default if it's not armv7 processor
3641 or doesn't prefer ldrd/strd. */
3642 if (flag_schedule_fusion
== 2
3643 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3644 flag_schedule_fusion
= 0;
3646 /* Need to remember initial options before they are overriden. */
3647 init_optimize
= build_optimization_node (&global_options
);
3649 arm_option_override_internal (&global_options
, &global_options_set
);
3650 arm_option_check_internal (&global_options
);
3651 arm_option_params_internal ();
3653 /* Create the default target_options structure. */
3654 target_option_default_node
= target_option_current_node
3655 = build_target_option_node (&global_options
);
3657 /* Register global variables with the garbage collector. */
3658 arm_add_gc_roots ();
3660 /* Init initial mode for testing. */
3661 thumb_flipper
= TARGET_THUMB
;
3665 arm_add_gc_roots (void)
3667 gcc_obstack_init(&minipool_obstack
);
3668 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3671 /* A table of known ARM exception types.
3672 For use with the interrupt function attribute. */
3676 const char *const arg
;
3677 const unsigned long return_value
;
3681 static const isr_attribute_arg isr_attribute_args
[] =
3683 { "IRQ", ARM_FT_ISR
},
3684 { "irq", ARM_FT_ISR
},
3685 { "FIQ", ARM_FT_FIQ
},
3686 { "fiq", ARM_FT_FIQ
},
3687 { "ABORT", ARM_FT_ISR
},
3688 { "abort", ARM_FT_ISR
},
3689 { "ABORT", ARM_FT_ISR
},
3690 { "abort", ARM_FT_ISR
},
3691 { "UNDEF", ARM_FT_EXCEPTION
},
3692 { "undef", ARM_FT_EXCEPTION
},
3693 { "SWI", ARM_FT_EXCEPTION
},
3694 { "swi", ARM_FT_EXCEPTION
},
3695 { NULL
, ARM_FT_NORMAL
}
3698 /* Returns the (interrupt) function type of the current
3699 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3701 static unsigned long
3702 arm_isr_value (tree argument
)
3704 const isr_attribute_arg
* ptr
;
3708 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3710 /* No argument - default to IRQ. */
3711 if (argument
== NULL_TREE
)
3714 /* Get the value of the argument. */
3715 if (TREE_VALUE (argument
) == NULL_TREE
3716 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3717 return ARM_FT_UNKNOWN
;
3719 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3721 /* Check it against the list of known arguments. */
3722 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3723 if (streq (arg
, ptr
->arg
))
3724 return ptr
->return_value
;
3726 /* An unrecognized interrupt type. */
3727 return ARM_FT_UNKNOWN
;
3730 /* Computes the type of the current function. */
3732 static unsigned long
3733 arm_compute_func_type (void)
3735 unsigned long type
= ARM_FT_UNKNOWN
;
3739 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3741 /* Decide if the current function is volatile. Such functions
3742 never return, and many memory cycles can be saved by not storing
3743 register values that will never be needed again. This optimization
3744 was added to speed up context switching in a kernel application. */
3746 && (TREE_NOTHROW (current_function_decl
)
3747 || !(flag_unwind_tables
3749 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3750 && TREE_THIS_VOLATILE (current_function_decl
))
3751 type
|= ARM_FT_VOLATILE
;
3753 if (cfun
->static_chain_decl
!= NULL
)
3754 type
|= ARM_FT_NESTED
;
3756 attr
= DECL_ATTRIBUTES (current_function_decl
);
3758 a
= lookup_attribute ("naked", attr
);
3760 type
|= ARM_FT_NAKED
;
3762 a
= lookup_attribute ("isr", attr
);
3764 a
= lookup_attribute ("interrupt", attr
);
3767 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3769 type
|= arm_isr_value (TREE_VALUE (a
));
3771 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3772 type
|= ARM_FT_CMSE_ENTRY
;
3777 /* Returns the type of the current function. */
3780 arm_current_func_type (void)
3782 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3783 cfun
->machine
->func_type
= arm_compute_func_type ();
3785 return cfun
->machine
->func_type
;
3789 arm_allocate_stack_slots_for_args (void)
3791 /* Naked functions should not allocate stack slots for arguments. */
3792 return !IS_NAKED (arm_current_func_type ());
3796 arm_warn_func_return (tree decl
)
3798 /* Naked functions are implemented entirely in assembly, including the
3799 return sequence, so suppress warnings about this. */
3800 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3804 /* Output assembler code for a block containing the constant parts
3805 of a trampoline, leaving space for the variable parts.
3807 On the ARM, (if r8 is the static chain regnum, and remembering that
3808 referencing pc adds an offset of 8) the trampoline looks like:
3811 .word static chain value
3812 .word function's address
3813 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3816 arm_asm_trampoline_template (FILE *f
)
3818 fprintf (f
, "\t.syntax unified\n");
3822 fprintf (f
, "\t.arm\n");
3823 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3824 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3826 else if (TARGET_THUMB2
)
3828 fprintf (f
, "\t.thumb\n");
3829 /* The Thumb-2 trampoline is similar to the arm implementation.
3830 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3831 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3832 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3833 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3837 ASM_OUTPUT_ALIGN (f
, 2);
3838 fprintf (f
, "\t.code\t16\n");
3839 fprintf (f
, ".Ltrampoline_start:\n");
3840 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3841 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3842 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3843 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3844 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3845 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3847 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3848 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3851 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3854 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3856 rtx fnaddr
, mem
, a_tramp
;
3858 emit_block_move (m_tramp
, assemble_trampoline_template (),
3859 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3861 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3862 emit_move_insn (mem
, chain_value
);
3864 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3865 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3866 emit_move_insn (mem
, fnaddr
);
3868 a_tramp
= XEXP (m_tramp
, 0);
3869 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3870 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3871 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3874 /* Thumb trampolines should be entered in thumb mode, so set
3875 the bottom bit of the address. */
3878 arm_trampoline_adjust_address (rtx addr
)
3881 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3882 NULL
, 0, OPTAB_LIB_WIDEN
);
3886 /* Return 1 if it is possible to return using a single instruction.
3887 If SIBLING is non-null, this is a test for a return before a sibling
3888 call. SIBLING is the call insn, so we can examine its register usage. */
3891 use_return_insn (int iscond
, rtx sibling
)
3894 unsigned int func_type
;
3895 unsigned long saved_int_regs
;
3896 unsigned HOST_WIDE_INT stack_adjust
;
3897 arm_stack_offsets
*offsets
;
3899 /* Never use a return instruction before reload has run. */
3900 if (!reload_completed
)
3903 func_type
= arm_current_func_type ();
3905 /* Naked, volatile and stack alignment functions need special
3907 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3910 /* So do interrupt functions that use the frame pointer and Thumb
3911 interrupt functions. */
3912 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3915 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3916 && !optimize_function_for_size_p (cfun
))
3919 offsets
= arm_get_frame_offsets ();
3920 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3922 /* As do variadic functions. */
3923 if (crtl
->args
.pretend_args_size
3924 || cfun
->machine
->uses_anonymous_args
3925 /* Or if the function calls __builtin_eh_return () */
3926 || crtl
->calls_eh_return
3927 /* Or if the function calls alloca */
3928 || cfun
->calls_alloca
3929 /* Or if there is a stack adjustment. However, if the stack pointer
3930 is saved on the stack, we can use a pre-incrementing stack load. */
3931 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3932 && stack_adjust
== 4))
3933 /* Or if the static chain register was saved above the frame, under the
3934 assumption that the stack pointer isn't saved on the stack. */
3935 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3936 && arm_compute_static_chain_stack_bytes() != 0))
3939 saved_int_regs
= offsets
->saved_regs_mask
;
3941 /* Unfortunately, the insn
3943 ldmib sp, {..., sp, ...}
3945 triggers a bug on most SA-110 based devices, such that the stack
3946 pointer won't be correctly restored if the instruction takes a
3947 page fault. We work around this problem by popping r3 along with
3948 the other registers, since that is never slower than executing
3949 another instruction.
3951 We test for !arm_arch5 here, because code for any architecture
3952 less than this could potentially be run on one of the buggy
3954 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3956 /* Validate that r3 is a call-clobbered register (always true in
3957 the default abi) ... */
3958 if (!call_used_regs
[3])
3961 /* ... that it isn't being used for a return value ... */
3962 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3965 /* ... or for a tail-call argument ... */
3968 gcc_assert (CALL_P (sibling
));
3970 if (find_regno_fusage (sibling
, USE
, 3))
3974 /* ... and that there are no call-saved registers in r0-r2
3975 (always true in the default ABI). */
3976 if (saved_int_regs
& 0x7)
3980 /* Can't be done if interworking with Thumb, and any registers have been
3982 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3985 /* On StrongARM, conditional returns are expensive if they aren't
3986 taken and multiple registers have been stacked. */
3987 if (iscond
&& arm_tune_strongarm
)
3989 /* Conditional return when just the LR is stored is a simple
3990 conditional-load instruction, that's not expensive. */
3991 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3995 && arm_pic_register
!= INVALID_REGNUM
3996 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4000 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4001 several instructions if anything needs to be popped. */
4002 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4005 /* If there are saved registers but the LR isn't saved, then we need
4006 two instructions for the return. */
4007 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4010 /* Can't be done if any of the VFP regs are pushed,
4011 since this also requires an insn. */
4012 if (TARGET_HARD_FLOAT
)
4013 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4014 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4017 if (TARGET_REALLY_IWMMXT
)
4018 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4019 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4025 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4026 shrink-wrapping if possible. This is the case if we need to emit a
4027 prologue, which we can test by looking at the offsets. */
4029 use_simple_return_p (void)
4031 arm_stack_offsets
*offsets
;
4033 offsets
= arm_get_frame_offsets ();
4034 return offsets
->outgoing_args
!= 0;
4037 /* Return TRUE if int I is a valid immediate ARM constant. */
4040 const_ok_for_arm (HOST_WIDE_INT i
)
4044 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4045 be all zero, or all one. */
4046 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4047 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4048 != ((~(unsigned HOST_WIDE_INT
) 0)
4049 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4052 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4054 /* Fast return for 0 and small values. We must do this for zero, since
4055 the code below can't handle that one case. */
4056 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4059 /* Get the number of trailing zeros. */
4060 lowbit
= ffs((int) i
) - 1;
4062 /* Only even shifts are allowed in ARM mode so round down to the
4063 nearest even number. */
4067 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4072 /* Allow rotated constants in ARM mode. */
4074 && ((i
& ~0xc000003f) == 0
4075 || (i
& ~0xf000000f) == 0
4076 || (i
& ~0xfc000003) == 0))
4083 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4086 if (i
== v
|| i
== (v
| (v
<< 8)))
4089 /* Allow repeated pattern 0xXY00XY00. */
4099 /* Return true if I is a valid constant for the operation CODE. */
4101 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4103 if (const_ok_for_arm (i
))
4109 /* See if we can use movw. */
4110 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4113 /* Otherwise, try mvn. */
4114 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4117 /* See if we can use addw or subw. */
4119 && ((i
& 0xfffff000) == 0
4120 || ((-i
) & 0xfffff000) == 0))
4141 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4143 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4149 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4153 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4160 /* Return true if I is a valid di mode constant for the operation CODE. */
4162 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4164 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4165 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4166 rtx hi
= GEN_INT (hi_val
);
4167 rtx lo
= GEN_INT (lo_val
);
4177 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4178 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4180 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4187 /* Emit a sequence of insns to handle a large constant.
4188 CODE is the code of the operation required, it can be any of SET, PLUS,
4189 IOR, AND, XOR, MINUS;
4190 MODE is the mode in which the operation is being performed;
4191 VAL is the integer to operate on;
4192 SOURCE is the other operand (a register, or a null-pointer for SET);
4193 SUBTARGETS means it is safe to create scratch registers if that will
4194 either produce a simpler sequence, or we will want to cse the values.
4195 Return value is the number of insns emitted. */
4197 /* ??? Tweak this for thumb2. */
4199 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4200 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4204 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4205 cond
= COND_EXEC_TEST (PATTERN (insn
));
4209 if (subtargets
|| code
== SET
4210 || (REG_P (target
) && REG_P (source
)
4211 && REGNO (target
) != REGNO (source
)))
4213 /* After arm_reorg has been called, we can't fix up expensive
4214 constants by pushing them into memory so we must synthesize
4215 them in-line, regardless of the cost. This is only likely to
4216 be more costly on chips that have load delay slots and we are
4217 compiling without running the scheduler (so no splitting
4218 occurred before the final instruction emission).
4220 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4222 if (!cfun
->machine
->after_arm_reorg
4224 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4226 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4231 /* Currently SET is the only monadic value for CODE, all
4232 the rest are diadic. */
4233 if (TARGET_USE_MOVT
)
4234 arm_emit_movpair (target
, GEN_INT (val
));
4236 emit_set_insn (target
, GEN_INT (val
));
4242 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4244 if (TARGET_USE_MOVT
)
4245 arm_emit_movpair (temp
, GEN_INT (val
));
4247 emit_set_insn (temp
, GEN_INT (val
));
4249 /* For MINUS, the value is subtracted from, since we never
4250 have subtraction of a constant. */
4252 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4254 emit_set_insn (target
,
4255 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4261 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4265 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4266 ARM/THUMB2 immediates, and add up to VAL.
4267 Thr function return value gives the number of insns required. */
4269 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4270 struct four_ints
*return_sequence
)
4272 int best_consecutive_zeros
= 0;
4276 struct four_ints tmp_sequence
;
4278 /* If we aren't targeting ARM, the best place to start is always at
4279 the bottom, otherwise look more closely. */
4282 for (i
= 0; i
< 32; i
+= 2)
4284 int consecutive_zeros
= 0;
4286 if (!(val
& (3 << i
)))
4288 while ((i
< 32) && !(val
& (3 << i
)))
4290 consecutive_zeros
+= 2;
4293 if (consecutive_zeros
> best_consecutive_zeros
)
4295 best_consecutive_zeros
= consecutive_zeros
;
4296 best_start
= i
- consecutive_zeros
;
4303 /* So long as it won't require any more insns to do so, it's
4304 desirable to emit a small constant (in bits 0...9) in the last
4305 insn. This way there is more chance that it can be combined with
4306 a later addressing insn to form a pre-indexed load or store
4307 operation. Consider:
4309 *((volatile int *)0xe0000100) = 1;
4310 *((volatile int *)0xe0000110) = 2;
4312 We want this to wind up as:
4316 str rB, [rA, #0x100]
4318 str rB, [rA, #0x110]
4320 rather than having to synthesize both large constants from scratch.
4322 Therefore, we calculate how many insns would be required to emit
4323 the constant starting from `best_start', and also starting from
4324 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4325 yield a shorter sequence, we may as well use zero. */
4326 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4328 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4330 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4331 if (insns2
<= insns1
)
4333 *return_sequence
= tmp_sequence
;
4341 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4343 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4344 struct four_ints
*return_sequence
, int i
)
4346 int remainder
= val
& 0xffffffff;
4349 /* Try and find a way of doing the job in either two or three
4352 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4353 location. We start at position I. This may be the MSB, or
4354 optimial_immediate_sequence may have positioned it at the largest block
4355 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4356 wrapping around to the top of the word when we drop off the bottom.
4357 In the worst case this code should produce no more than four insns.
4359 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4360 constants, shifted to any arbitrary location. We should always start
4365 unsigned int b1
, b2
, b3
, b4
;
4366 unsigned HOST_WIDE_INT result
;
4369 gcc_assert (insns
< 4);
4374 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4375 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4378 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4379 /* We can use addw/subw for the last 12 bits. */
4383 /* Use an 8-bit shifted/rotated immediate. */
4387 result
= remainder
& ((0x0ff << end
)
4388 | ((i
< end
) ? (0xff >> (32 - end
))
4395 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4396 arbitrary shifts. */
4397 i
-= TARGET_ARM
? 2 : 1;
4401 /* Next, see if we can do a better job with a thumb2 replicated
4404 We do it this way around to catch the cases like 0x01F001E0 where
4405 two 8-bit immediates would work, but a replicated constant would
4408 TODO: 16-bit constants that don't clear all the bits, but still win.
4409 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4412 b1
= (remainder
& 0xff000000) >> 24;
4413 b2
= (remainder
& 0x00ff0000) >> 16;
4414 b3
= (remainder
& 0x0000ff00) >> 8;
4415 b4
= remainder
& 0xff;
4419 /* The 8-bit immediate already found clears b1 (and maybe b2),
4420 but must leave b3 and b4 alone. */
4422 /* First try to find a 32-bit replicated constant that clears
4423 almost everything. We can assume that we can't do it in one,
4424 or else we wouldn't be here. */
4425 unsigned int tmp
= b1
& b2
& b3
& b4
;
4426 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4428 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4429 + (tmp
== b3
) + (tmp
== b4
);
4431 && (matching_bytes
>= 3
4432 || (matching_bytes
== 2
4433 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4435 /* At least 3 of the bytes match, and the fourth has at
4436 least as many bits set, or two of the bytes match
4437 and it will only require one more insn to finish. */
4445 /* Second, try to find a 16-bit replicated constant that can
4446 leave three of the bytes clear. If b2 or b4 is already
4447 zero, then we can. If the 8-bit from above would not
4448 clear b2 anyway, then we still win. */
4449 else if (b1
== b3
&& (!b2
|| !b4
4450 || (remainder
& 0x00ff0000 & ~result
)))
4452 result
= remainder
& 0xff00ff00;
4458 /* The 8-bit immediate already found clears b2 (and maybe b3)
4459 and we don't get here unless b1 is alredy clear, but it will
4460 leave b4 unchanged. */
4462 /* If we can clear b2 and b4 at once, then we win, since the
4463 8-bits couldn't possibly reach that far. */
4466 result
= remainder
& 0x00ff00ff;
4472 return_sequence
->i
[insns
++] = result
;
4473 remainder
&= ~result
;
4475 if (code
== SET
|| code
== MINUS
)
4483 /* Emit an instruction with the indicated PATTERN. If COND is
4484 non-NULL, conditionalize the execution of the instruction on COND
4488 emit_constant_insn (rtx cond
, rtx pattern
)
4491 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4492 emit_insn (pattern
);
4495 /* As above, but extra parameter GENERATE which, if clear, suppresses
4499 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4500 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4501 int subtargets
, int generate
)
4505 int final_invert
= 0;
4507 int set_sign_bit_copies
= 0;
4508 int clear_sign_bit_copies
= 0;
4509 int clear_zero_bit_copies
= 0;
4510 int set_zero_bit_copies
= 0;
4511 int insns
= 0, neg_insns
, inv_insns
;
4512 unsigned HOST_WIDE_INT temp1
, temp2
;
4513 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4514 struct four_ints
*immediates
;
4515 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4517 /* Find out which operations are safe for a given CODE. Also do a quick
4518 check for degenerate cases; these can occur when DImode operations
4531 if (remainder
== 0xffffffff)
4534 emit_constant_insn (cond
,
4535 gen_rtx_SET (target
,
4536 GEN_INT (ARM_SIGN_EXTEND (val
))));
4542 if (reload_completed
&& rtx_equal_p (target
, source
))
4546 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4555 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4558 if (remainder
== 0xffffffff)
4560 if (reload_completed
&& rtx_equal_p (target
, source
))
4563 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4572 if (reload_completed
&& rtx_equal_p (target
, source
))
4575 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4579 if (remainder
== 0xffffffff)
4582 emit_constant_insn (cond
,
4583 gen_rtx_SET (target
,
4584 gen_rtx_NOT (mode
, source
)));
4591 /* We treat MINUS as (val - source), since (source - val) is always
4592 passed as (source + (-val)). */
4596 emit_constant_insn (cond
,
4597 gen_rtx_SET (target
,
4598 gen_rtx_NEG (mode
, source
)));
4601 if (const_ok_for_arm (val
))
4604 emit_constant_insn (cond
,
4605 gen_rtx_SET (target
,
4606 gen_rtx_MINUS (mode
, GEN_INT (val
),
4617 /* If we can do it in one insn get out quickly. */
4618 if (const_ok_for_op (val
, code
))
4621 emit_constant_insn (cond
,
4622 gen_rtx_SET (target
,
4624 ? gen_rtx_fmt_ee (code
, mode
, source
,
4630 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4632 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4633 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4637 if (mode
== SImode
&& i
== 16)
4638 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4640 emit_constant_insn (cond
,
4641 gen_zero_extendhisi2
4642 (target
, gen_lowpart (HImode
, source
)));
4644 /* Extz only supports SImode, but we can coerce the operands
4646 emit_constant_insn (cond
,
4647 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4648 gen_lowpart (SImode
, source
),
4649 GEN_INT (i
), const0_rtx
));
4655 /* Calculate a few attributes that may be useful for specific
4657 /* Count number of leading zeros. */
4658 for (i
= 31; i
>= 0; i
--)
4660 if ((remainder
& (1 << i
)) == 0)
4661 clear_sign_bit_copies
++;
4666 /* Count number of leading 1's. */
4667 for (i
= 31; i
>= 0; i
--)
4669 if ((remainder
& (1 << i
)) != 0)
4670 set_sign_bit_copies
++;
4675 /* Count number of trailing zero's. */
4676 for (i
= 0; i
<= 31; i
++)
4678 if ((remainder
& (1 << i
)) == 0)
4679 clear_zero_bit_copies
++;
4684 /* Count number of trailing 1's. */
4685 for (i
= 0; i
<= 31; i
++)
4687 if ((remainder
& (1 << i
)) != 0)
4688 set_zero_bit_copies
++;
4696 /* See if we can do this by sign_extending a constant that is known
4697 to be negative. This is a good, way of doing it, since the shift
4698 may well merge into a subsequent insn. */
4699 if (set_sign_bit_copies
> 1)
4701 if (const_ok_for_arm
4702 (temp1
= ARM_SIGN_EXTEND (remainder
4703 << (set_sign_bit_copies
- 1))))
4707 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4708 emit_constant_insn (cond
,
4709 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4710 emit_constant_insn (cond
,
4711 gen_ashrsi3 (target
, new_src
,
4712 GEN_INT (set_sign_bit_copies
- 1)));
4716 /* For an inverted constant, we will need to set the low bits,
4717 these will be shifted out of harm's way. */
4718 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4719 if (const_ok_for_arm (~temp1
))
4723 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4724 emit_constant_insn (cond
,
4725 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4726 emit_constant_insn (cond
,
4727 gen_ashrsi3 (target
, new_src
,
4728 GEN_INT (set_sign_bit_copies
- 1)));
4734 /* See if we can calculate the value as the difference between two
4735 valid immediates. */
4736 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4738 int topshift
= clear_sign_bit_copies
& ~1;
4740 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4741 & (0xff000000 >> topshift
));
4743 /* If temp1 is zero, then that means the 9 most significant
4744 bits of remainder were 1 and we've caused it to overflow.
4745 When topshift is 0 we don't need to do anything since we
4746 can borrow from 'bit 32'. */
4747 if (temp1
== 0 && topshift
!= 0)
4748 temp1
= 0x80000000 >> (topshift
- 1);
4750 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4752 if (const_ok_for_arm (temp2
))
4756 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4757 emit_constant_insn (cond
,
4758 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4759 emit_constant_insn (cond
,
4760 gen_addsi3 (target
, new_src
,
4768 /* See if we can generate this by setting the bottom (or the top)
4769 16 bits, and then shifting these into the other half of the
4770 word. We only look for the simplest cases, to do more would cost
4771 too much. Be careful, however, not to generate this when the
4772 alternative would take fewer insns. */
4773 if (val
& 0xffff0000)
4775 temp1
= remainder
& 0xffff0000;
4776 temp2
= remainder
& 0x0000ffff;
4778 /* Overlaps outside this range are best done using other methods. */
4779 for (i
= 9; i
< 24; i
++)
4781 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4782 && !const_ok_for_arm (temp2
))
4784 rtx new_src
= (subtargets
4785 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4787 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4788 source
, subtargets
, generate
);
4796 gen_rtx_ASHIFT (mode
, source
,
4803 /* Don't duplicate cases already considered. */
4804 for (i
= 17; i
< 24; i
++)
4806 if (((temp1
| (temp1
>> i
)) == remainder
)
4807 && !const_ok_for_arm (temp1
))
4809 rtx new_src
= (subtargets
4810 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4812 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4813 source
, subtargets
, generate
);
4818 gen_rtx_SET (target
,
4821 gen_rtx_LSHIFTRT (mode
, source
,
4832 /* If we have IOR or XOR, and the constant can be loaded in a
4833 single instruction, and we can find a temporary to put it in,
4834 then this can be done in two instructions instead of 3-4. */
4836 /* TARGET can't be NULL if SUBTARGETS is 0 */
4837 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4839 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4843 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4845 emit_constant_insn (cond
,
4846 gen_rtx_SET (sub
, GEN_INT (val
)));
4847 emit_constant_insn (cond
,
4848 gen_rtx_SET (target
,
4849 gen_rtx_fmt_ee (code
, mode
,
4860 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4861 and the remainder 0s for e.g. 0xfff00000)
4862 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4864 This can be done in 2 instructions by using shifts with mov or mvn.
4869 mvn r0, r0, lsr #12 */
4870 if (set_sign_bit_copies
> 8
4871 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4875 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4876 rtx shift
= GEN_INT (set_sign_bit_copies
);
4882 gen_rtx_ASHIFT (mode
,
4887 gen_rtx_SET (target
,
4889 gen_rtx_LSHIFTRT (mode
, sub
,
4896 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4898 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4900 For eg. r0 = r0 | 0xfff
4905 if (set_zero_bit_copies
> 8
4906 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4910 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4911 rtx shift
= GEN_INT (set_zero_bit_copies
);
4917 gen_rtx_LSHIFTRT (mode
,
4922 gen_rtx_SET (target
,
4924 gen_rtx_ASHIFT (mode
, sub
,
4930 /* This will never be reached for Thumb2 because orn is a valid
4931 instruction. This is for Thumb1 and the ARM 32 bit cases.
4933 x = y | constant (such that ~constant is a valid constant)
4935 x = ~(~y & ~constant).
4937 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4941 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4942 emit_constant_insn (cond
,
4944 gen_rtx_NOT (mode
, source
)));
4947 sub
= gen_reg_rtx (mode
);
4948 emit_constant_insn (cond
,
4950 gen_rtx_AND (mode
, source
,
4952 emit_constant_insn (cond
,
4953 gen_rtx_SET (target
,
4954 gen_rtx_NOT (mode
, sub
)));
4961 /* See if two shifts will do 2 or more insn's worth of work. */
4962 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4964 HOST_WIDE_INT shift_mask
= ((0xffffffff
4965 << (32 - clear_sign_bit_copies
))
4968 if ((remainder
| shift_mask
) != 0xffffffff)
4970 HOST_WIDE_INT new_val
4971 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4975 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4976 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4977 new_src
, source
, subtargets
, 1);
4982 rtx targ
= subtargets
? NULL_RTX
: target
;
4983 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4984 targ
, source
, subtargets
, 0);
4990 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4991 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4993 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4994 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5000 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5002 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5004 if ((remainder
| shift_mask
) != 0xffffffff)
5006 HOST_WIDE_INT new_val
5007 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5010 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5012 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5013 new_src
, source
, subtargets
, 1);
5018 rtx targ
= subtargets
? NULL_RTX
: target
;
5020 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5021 targ
, source
, subtargets
, 0);
5027 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5028 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5030 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5031 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5043 /* Calculate what the instruction sequences would be if we generated it
5044 normally, negated, or inverted. */
5046 /* AND cannot be split into multiple insns, so invert and use BIC. */
5049 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5052 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5057 if (can_invert
|| final_invert
)
5058 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5063 immediates
= &pos_immediates
;
5065 /* Is the negated immediate sequence more efficient? */
5066 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5069 immediates
= &neg_immediates
;
5074 /* Is the inverted immediate sequence more efficient?
5075 We must allow for an extra NOT instruction for XOR operations, although
5076 there is some chance that the final 'mvn' will get optimized later. */
5077 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5080 immediates
= &inv_immediates
;
5088 /* Now output the chosen sequence as instructions. */
5091 for (i
= 0; i
< insns
; i
++)
5093 rtx new_src
, temp1_rtx
;
5095 temp1
= immediates
->i
[i
];
5097 if (code
== SET
|| code
== MINUS
)
5098 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5099 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5100 new_src
= gen_reg_rtx (mode
);
5106 else if (can_negate
)
5109 temp1
= trunc_int_for_mode (temp1
, mode
);
5110 temp1_rtx
= GEN_INT (temp1
);
5114 else if (code
== MINUS
)
5115 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5117 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5119 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5124 can_negate
= can_invert
;
5128 else if (code
== MINUS
)
5136 emit_constant_insn (cond
, gen_rtx_SET (target
,
5137 gen_rtx_NOT (mode
, source
)));
5144 /* Canonicalize a comparison so that we are more likely to recognize it.
5145 This can be done for a few constant compares, where we can make the
5146 immediate value easier to load. */
5149 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5150 bool op0_preserve_value
)
5153 unsigned HOST_WIDE_INT i
, maxval
;
5155 mode
= GET_MODE (*op0
);
5156 if (mode
== VOIDmode
)
5157 mode
= GET_MODE (*op1
);
5159 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5161 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5162 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5163 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5164 for GTU/LEU in Thumb mode. */
5168 if (*code
== GT
|| *code
== LE
5169 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5171 /* Missing comparison. First try to use an available
5173 if (CONST_INT_P (*op1
))
5181 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5183 *op1
= GEN_INT (i
+ 1);
5184 *code
= *code
== GT
? GE
: LT
;
5190 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5191 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5193 *op1
= GEN_INT (i
+ 1);
5194 *code
= *code
== GTU
? GEU
: LTU
;
5203 /* If that did not work, reverse the condition. */
5204 if (!op0_preserve_value
)
5206 std::swap (*op0
, *op1
);
5207 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5213 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5214 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5215 to facilitate possible combining with a cmp into 'ands'. */
5217 && GET_CODE (*op0
) == ZERO_EXTEND
5218 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5219 && GET_MODE (XEXP (*op0
, 0)) == QImode
5220 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5221 && subreg_lowpart_p (XEXP (*op0
, 0))
5222 && *op1
== const0_rtx
)
5223 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5226 /* Comparisons smaller than DImode. Only adjust comparisons against
5227 an out-of-range constant. */
5228 if (!CONST_INT_P (*op1
)
5229 || const_ok_for_arm (INTVAL (*op1
))
5230 || const_ok_for_arm (- INTVAL (*op1
)))
5244 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5246 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5247 *code
= *code
== GT
? GE
: LT
;
5255 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5257 *op1
= GEN_INT (i
- 1);
5258 *code
= *code
== GE
? GT
: LE
;
5265 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5266 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5268 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5269 *code
= *code
== GTU
? GEU
: LTU
;
5277 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5279 *op1
= GEN_INT (i
- 1);
5280 *code
= *code
== GEU
? GTU
: LEU
;
5291 /* Define how to find the value returned by a function. */
5294 arm_function_value(const_tree type
, const_tree func
,
5295 bool outgoing ATTRIBUTE_UNUSED
)
5298 int unsignedp ATTRIBUTE_UNUSED
;
5299 rtx r ATTRIBUTE_UNUSED
;
5301 mode
= TYPE_MODE (type
);
5303 if (TARGET_AAPCS_BASED
)
5304 return aapcs_allocate_return_reg (mode
, type
, func
);
5306 /* Promote integer types. */
5307 if (INTEGRAL_TYPE_P (type
))
5308 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5310 /* Promotes small structs returned in a register to full-word size
5311 for big-endian AAPCS. */
5312 if (arm_return_in_msb (type
))
5314 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5315 if (size
% UNITS_PER_WORD
!= 0)
5317 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5318 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5322 return arm_libcall_value_1 (mode
);
5325 /* libcall hashtable helpers. */
5327 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5329 static inline hashval_t
hash (const rtx_def
*);
5330 static inline bool equal (const rtx_def
*, const rtx_def
*);
5331 static inline void remove (rtx_def
*);
5335 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5337 return rtx_equal_p (p1
, p2
);
5341 libcall_hasher::hash (const rtx_def
*p1
)
5343 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5346 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5349 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5351 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5355 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5357 static bool init_done
= false;
5358 static libcall_table_type
*libcall_htab
= NULL
;
5364 libcall_htab
= new libcall_table_type (31);
5365 add_libcall (libcall_htab
,
5366 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5367 add_libcall (libcall_htab
,
5368 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5369 add_libcall (libcall_htab
,
5370 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5371 add_libcall (libcall_htab
,
5372 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5374 add_libcall (libcall_htab
,
5375 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5376 add_libcall (libcall_htab
,
5377 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5378 add_libcall (libcall_htab
,
5379 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5380 add_libcall (libcall_htab
,
5381 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5383 add_libcall (libcall_htab
,
5384 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5385 add_libcall (libcall_htab
,
5386 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5387 add_libcall (libcall_htab
,
5388 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5389 add_libcall (libcall_htab
,
5390 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5391 add_libcall (libcall_htab
,
5392 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5393 add_libcall (libcall_htab
,
5394 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5395 add_libcall (libcall_htab
,
5396 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5397 add_libcall (libcall_htab
,
5398 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5400 /* Values from double-precision helper functions are returned in core
5401 registers if the selected core only supports single-precision
5402 arithmetic, even if we are using the hard-float ABI. The same is
5403 true for single-precision helpers, but we will never be using the
5404 hard-float ABI on a CPU which doesn't support single-precision
5405 operations in hardware. */
5406 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5407 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5408 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5409 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5410 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5411 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5412 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5413 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5414 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5415 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5416 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5417 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5419 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5421 add_libcall (libcall_htab
,
5422 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5425 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5429 arm_libcall_value_1 (machine_mode mode
)
5431 if (TARGET_AAPCS_BASED
)
5432 return aapcs_libcall_value (mode
);
5433 else if (TARGET_IWMMXT_ABI
5434 && arm_vector_mode_supported_p (mode
))
5435 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5437 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5440 /* Define how to find the value returned by a library function
5441 assuming the value has mode MODE. */
5444 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5446 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5447 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5449 /* The following libcalls return their result in integer registers,
5450 even though they return a floating point value. */
5451 if (arm_libcall_uses_aapcs_base (libcall
))
5452 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5456 return arm_libcall_value_1 (mode
);
5459 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5462 arm_function_value_regno_p (const unsigned int regno
)
5464 if (regno
== ARG_REGISTER (1)
5466 && TARGET_AAPCS_BASED
5467 && TARGET_HARD_FLOAT
5468 && regno
== FIRST_VFP_REGNUM
)
5469 || (TARGET_IWMMXT_ABI
5470 && regno
== FIRST_IWMMXT_REGNUM
))
5476 /* Determine the amount of memory needed to store the possible return
5477 registers of an untyped call. */
5479 arm_apply_result_size (void)
5485 if (TARGET_HARD_FLOAT_ABI
)
5487 if (TARGET_IWMMXT_ABI
)
5494 /* Decide whether TYPE should be returned in memory (true)
5495 or in a register (false). FNTYPE is the type of the function making
5498 arm_return_in_memory (const_tree type
, const_tree fntype
)
5502 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5504 if (TARGET_AAPCS_BASED
)
5506 /* Simple, non-aggregate types (ie not including vectors and
5507 complex) are always returned in a register (or registers).
5508 We don't care about which register here, so we can short-cut
5509 some of the detail. */
5510 if (!AGGREGATE_TYPE_P (type
)
5511 && TREE_CODE (type
) != VECTOR_TYPE
5512 && TREE_CODE (type
) != COMPLEX_TYPE
)
5515 /* Any return value that is no larger than one word can be
5517 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5520 /* Check any available co-processors to see if they accept the
5521 type as a register candidate (VFP, for example, can return
5522 some aggregates in consecutive registers). These aren't
5523 available if the call is variadic. */
5524 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5527 /* Vector values should be returned using ARM registers, not
5528 memory (unless they're over 16 bytes, which will break since
5529 we only have four call-clobbered registers to play with). */
5530 if (TREE_CODE (type
) == VECTOR_TYPE
)
5531 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5533 /* The rest go in memory. */
5537 if (TREE_CODE (type
) == VECTOR_TYPE
)
5538 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5540 if (!AGGREGATE_TYPE_P (type
) &&
5541 (TREE_CODE (type
) != VECTOR_TYPE
))
5542 /* All simple types are returned in registers. */
5545 if (arm_abi
!= ARM_ABI_APCS
)
5547 /* ATPCS and later return aggregate types in memory only if they are
5548 larger than a word (or are variable size). */
5549 return (size
< 0 || size
> UNITS_PER_WORD
);
5552 /* For the arm-wince targets we choose to be compatible with Microsoft's
5553 ARM and Thumb compilers, which always return aggregates in memory. */
5555 /* All structures/unions bigger than one word are returned in memory.
5556 Also catch the case where int_size_in_bytes returns -1. In this case
5557 the aggregate is either huge or of variable size, and in either case
5558 we will want to return it via memory and not in a register. */
5559 if (size
< 0 || size
> UNITS_PER_WORD
)
5562 if (TREE_CODE (type
) == RECORD_TYPE
)
5566 /* For a struct the APCS says that we only return in a register
5567 if the type is 'integer like' and every addressable element
5568 has an offset of zero. For practical purposes this means
5569 that the structure can have at most one non bit-field element
5570 and that this element must be the first one in the structure. */
5572 /* Find the first field, ignoring non FIELD_DECL things which will
5573 have been created by C++. */
5574 for (field
= TYPE_FIELDS (type
);
5575 field
&& TREE_CODE (field
) != FIELD_DECL
;
5576 field
= DECL_CHAIN (field
))
5580 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5582 /* Check that the first field is valid for returning in a register. */
5584 /* ... Floats are not allowed */
5585 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5588 /* ... Aggregates that are not themselves valid for returning in
5589 a register are not allowed. */
5590 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5593 /* Now check the remaining fields, if any. Only bitfields are allowed,
5594 since they are not addressable. */
5595 for (field
= DECL_CHAIN (field
);
5597 field
= DECL_CHAIN (field
))
5599 if (TREE_CODE (field
) != FIELD_DECL
)
5602 if (!DECL_BIT_FIELD_TYPE (field
))
5609 if (TREE_CODE (type
) == UNION_TYPE
)
5613 /* Unions can be returned in registers if every element is
5614 integral, or can be returned in an integer register. */
5615 for (field
= TYPE_FIELDS (type
);
5617 field
= DECL_CHAIN (field
))
5619 if (TREE_CODE (field
) != FIELD_DECL
)
5622 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5625 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5631 #endif /* not ARM_WINCE */
5633 /* Return all other types in memory. */
5637 const struct pcs_attribute_arg
5641 } pcs_attribute_args
[] =
5643 {"aapcs", ARM_PCS_AAPCS
},
5644 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5646 /* We could recognize these, but changes would be needed elsewhere
5647 * to implement them. */
5648 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5649 {"atpcs", ARM_PCS_ATPCS
},
5650 {"apcs", ARM_PCS_APCS
},
5652 {NULL
, ARM_PCS_UNKNOWN
}
5656 arm_pcs_from_attribute (tree attr
)
5658 const struct pcs_attribute_arg
*ptr
;
5661 /* Get the value of the argument. */
5662 if (TREE_VALUE (attr
) == NULL_TREE
5663 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5664 return ARM_PCS_UNKNOWN
;
5666 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5668 /* Check it against the list of known arguments. */
5669 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5670 if (streq (arg
, ptr
->arg
))
5673 /* An unrecognized interrupt type. */
5674 return ARM_PCS_UNKNOWN
;
5677 /* Get the PCS variant to use for this call. TYPE is the function's type
5678 specification, DECL is the specific declartion. DECL may be null if
5679 the call could be indirect or if this is a library call. */
5681 arm_get_pcs_model (const_tree type
, const_tree decl
)
5683 bool user_convention
= false;
5684 enum arm_pcs user_pcs
= arm_pcs_default
;
5689 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5692 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5693 user_convention
= true;
5696 if (TARGET_AAPCS_BASED
)
5698 /* Detect varargs functions. These always use the base rules
5699 (no argument is ever a candidate for a co-processor
5701 bool base_rules
= stdarg_p (type
);
5703 if (user_convention
)
5705 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5706 sorry ("non-AAPCS derived PCS variant");
5707 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5708 error ("variadic functions must use the base AAPCS variant");
5712 return ARM_PCS_AAPCS
;
5713 else if (user_convention
)
5715 else if (decl
&& flag_unit_at_a_time
)
5717 /* Local functions never leak outside this compilation unit,
5718 so we are free to use whatever conventions are
5720 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5721 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5723 return ARM_PCS_AAPCS_LOCAL
;
5726 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5727 sorry ("PCS variant");
5729 /* For everything else we use the target's default. */
5730 return arm_pcs_default
;
5735 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5736 const_tree fntype ATTRIBUTE_UNUSED
,
5737 rtx libcall ATTRIBUTE_UNUSED
,
5738 const_tree fndecl ATTRIBUTE_UNUSED
)
5740 /* Record the unallocated VFP registers. */
5741 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5742 pcum
->aapcs_vfp_reg_alloc
= 0;
5745 /* Walk down the type tree of TYPE counting consecutive base elements.
5746 If *MODEP is VOIDmode, then set it to the first valid floating point
5747 type. If a non-floating point type is found, or if a floating point
5748 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5749 otherwise return the count in the sub-tree. */
5751 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5756 switch (TREE_CODE (type
))
5759 mode
= TYPE_MODE (type
);
5760 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5763 if (*modep
== VOIDmode
)
5772 mode
= TYPE_MODE (TREE_TYPE (type
));
5773 if (mode
!= DFmode
&& mode
!= SFmode
)
5776 if (*modep
== VOIDmode
)
5785 /* Use V2SImode and V4SImode as representatives of all 64-bit
5786 and 128-bit vector types, whether or not those modes are
5787 supported with the present options. */
5788 size
= int_size_in_bytes (type
);
5801 if (*modep
== VOIDmode
)
5804 /* Vector modes are considered to be opaque: two vectors are
5805 equivalent for the purposes of being homogeneous aggregates
5806 if they are the same size. */
5815 tree index
= TYPE_DOMAIN (type
);
5817 /* Can't handle incomplete types nor sizes that are not
5819 if (!COMPLETE_TYPE_P (type
)
5820 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5823 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5826 || !TYPE_MAX_VALUE (index
)
5827 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5828 || !TYPE_MIN_VALUE (index
)
5829 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5833 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5834 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5836 /* There must be no padding. */
5837 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5849 /* Can't handle incomplete types nor sizes that are not
5851 if (!COMPLETE_TYPE_P (type
)
5852 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5855 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5857 if (TREE_CODE (field
) != FIELD_DECL
)
5860 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5866 /* There must be no padding. */
5867 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5874 case QUAL_UNION_TYPE
:
5876 /* These aren't very interesting except in a degenerate case. */
5881 /* Can't handle incomplete types nor sizes that are not
5883 if (!COMPLETE_TYPE_P (type
)
5884 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5887 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5889 if (TREE_CODE (field
) != FIELD_DECL
)
5892 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5895 count
= count
> sub_count
? count
: sub_count
;
5898 /* There must be no padding. */
5899 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5912 /* Return true if PCS_VARIANT should use VFP registers. */
5914 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5916 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5918 static bool seen_thumb1_vfp
= false;
5920 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5922 sorry ("Thumb-1 hard-float VFP ABI");
5923 /* sorry() is not immediately fatal, so only display this once. */
5924 seen_thumb1_vfp
= true;
5930 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5933 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5934 (TARGET_VFP_DOUBLE
|| !is_double
));
5937 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5938 suitable for passing or returning in VFP registers for the PCS
5939 variant selected. If it is, then *BASE_MODE is updated to contain
5940 a machine mode describing each element of the argument's type and
5941 *COUNT to hold the number of such elements. */
5943 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5944 machine_mode mode
, const_tree type
,
5945 machine_mode
*base_mode
, int *count
)
5947 machine_mode new_mode
= VOIDmode
;
5949 /* If we have the type information, prefer that to working things
5950 out from the mode. */
5953 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5955 if (ag_count
> 0 && ag_count
<= 4)
5960 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5961 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5962 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5967 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5970 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5976 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5979 *base_mode
= new_mode
;
5984 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5985 machine_mode mode
, const_tree type
)
5987 int count ATTRIBUTE_UNUSED
;
5988 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5990 if (!use_vfp_abi (pcs_variant
, false))
5992 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5997 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6000 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6003 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6004 &pcum
->aapcs_vfp_rmode
,
6005 &pcum
->aapcs_vfp_rcount
);
6008 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6009 for the behaviour of this function. */
6012 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6013 const_tree type ATTRIBUTE_UNUSED
)
6016 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6017 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6018 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6021 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6022 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6024 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6026 || (mode
== TImode
&& ! TARGET_NEON
)
6027 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6030 int rcount
= pcum
->aapcs_vfp_rcount
;
6032 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6036 /* Avoid using unsupported vector modes. */
6037 if (rmode
== V2SImode
)
6039 else if (rmode
== V4SImode
)
6046 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6047 for (i
= 0; i
< rcount
; i
++)
6049 rtx tmp
= gen_rtx_REG (rmode
,
6050 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6051 tmp
= gen_rtx_EXPR_LIST
6053 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6054 XVECEXP (par
, 0, i
) = tmp
;
6057 pcum
->aapcs_reg
= par
;
6060 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6066 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6067 comment there for the behaviour of this function. */
6070 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6072 const_tree type ATTRIBUTE_UNUSED
)
6074 if (!use_vfp_abi (pcs_variant
, false))
6078 || (GET_MODE_CLASS (mode
) == MODE_INT
6079 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6083 machine_mode ag_mode
;
6088 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6093 if (ag_mode
== V2SImode
)
6095 else if (ag_mode
== V4SImode
)
6101 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6102 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6103 for (i
= 0; i
< count
; i
++)
6105 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6106 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6107 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6108 XVECEXP (par
, 0, i
) = tmp
;
6114 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6118 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6119 machine_mode mode ATTRIBUTE_UNUSED
,
6120 const_tree type ATTRIBUTE_UNUSED
)
6122 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6123 pcum
->aapcs_vfp_reg_alloc
= 0;
6127 #define AAPCS_CP(X) \
6129 aapcs_ ## X ## _cum_init, \
6130 aapcs_ ## X ## _is_call_candidate, \
6131 aapcs_ ## X ## _allocate, \
6132 aapcs_ ## X ## _is_return_candidate, \
6133 aapcs_ ## X ## _allocate_return_reg, \
6134 aapcs_ ## X ## _advance \
6137 /* Table of co-processors that can be used to pass arguments in
6138 registers. Idealy no arugment should be a candidate for more than
6139 one co-processor table entry, but the table is processed in order
6140 and stops after the first match. If that entry then fails to put
6141 the argument into a co-processor register, the argument will go on
6145 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6146 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6148 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6149 BLKmode) is a candidate for this co-processor's registers; this
6150 function should ignore any position-dependent state in
6151 CUMULATIVE_ARGS and only use call-type dependent information. */
6152 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6154 /* Return true if the argument does get a co-processor register; it
6155 should set aapcs_reg to an RTX of the register allocated as is
6156 required for a return from FUNCTION_ARG. */
6157 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6159 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6160 be returned in this co-processor's registers. */
6161 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6163 /* Allocate and return an RTX element to hold the return type of a call. This
6164 routine must not fail and will only be called if is_return_candidate
6165 returned true with the same parameters. */
6166 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6168 /* Finish processing this argument and prepare to start processing
6170 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6171 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6179 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6184 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6185 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6192 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6194 /* We aren't passed a decl, so we can't check that a call is local.
6195 However, it isn't clear that that would be a win anyway, since it
6196 might limit some tail-calling opportunities. */
6197 enum arm_pcs pcs_variant
;
6201 const_tree fndecl
= NULL_TREE
;
6203 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6206 fntype
= TREE_TYPE (fntype
);
6209 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6212 pcs_variant
= arm_pcs_default
;
6214 if (pcs_variant
!= ARM_PCS_AAPCS
)
6218 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6219 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6228 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6231 /* We aren't passed a decl, so we can't check that a call is local.
6232 However, it isn't clear that that would be a win anyway, since it
6233 might limit some tail-calling opportunities. */
6234 enum arm_pcs pcs_variant
;
6235 int unsignedp ATTRIBUTE_UNUSED
;
6239 const_tree fndecl
= NULL_TREE
;
6241 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6244 fntype
= TREE_TYPE (fntype
);
6247 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6250 pcs_variant
= arm_pcs_default
;
6252 /* Promote integer types. */
6253 if (type
&& INTEGRAL_TYPE_P (type
))
6254 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6256 if (pcs_variant
!= ARM_PCS_AAPCS
)
6260 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6261 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6263 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6267 /* Promotes small structs returned in a register to full-word size
6268 for big-endian AAPCS. */
6269 if (type
&& arm_return_in_msb (type
))
6271 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6272 if (size
% UNITS_PER_WORD
!= 0)
6274 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6275 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6279 return gen_rtx_REG (mode
, R0_REGNUM
);
6283 aapcs_libcall_value (machine_mode mode
)
6285 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6286 && GET_MODE_SIZE (mode
) <= 4)
6289 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6292 /* Lay out a function argument using the AAPCS rules. The rule
6293 numbers referred to here are those in the AAPCS. */
6295 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6296 const_tree type
, bool named
)
6301 /* We only need to do this once per argument. */
6302 if (pcum
->aapcs_arg_processed
)
6305 pcum
->aapcs_arg_processed
= true;
6307 /* Special case: if named is false then we are handling an incoming
6308 anonymous argument which is on the stack. */
6312 /* Is this a potential co-processor register candidate? */
6313 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6315 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6316 pcum
->aapcs_cprc_slot
= slot
;
6318 /* We don't have to apply any of the rules from part B of the
6319 preparation phase, these are handled elsewhere in the
6324 /* A Co-processor register candidate goes either in its own
6325 class of registers or on the stack. */
6326 if (!pcum
->aapcs_cprc_failed
[slot
])
6328 /* C1.cp - Try to allocate the argument to co-processor
6330 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6333 /* C2.cp - Put the argument on the stack and note that we
6334 can't assign any more candidates in this slot. We also
6335 need to note that we have allocated stack space, so that
6336 we won't later try to split a non-cprc candidate between
6337 core registers and the stack. */
6338 pcum
->aapcs_cprc_failed
[slot
] = true;
6339 pcum
->can_split
= false;
6342 /* We didn't get a register, so this argument goes on the
6344 gcc_assert (pcum
->can_split
== false);
6349 /* C3 - For double-word aligned arguments, round the NCRN up to the
6350 next even number. */
6351 ncrn
= pcum
->aapcs_ncrn
;
6352 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6355 nregs
= ARM_NUM_REGS2(mode
, type
);
6357 /* Sigh, this test should really assert that nregs > 0, but a GCC
6358 extension allows empty structs and then gives them empty size; it
6359 then allows such a structure to be passed by value. For some of
6360 the code below we have to pretend that such an argument has
6361 non-zero size so that we 'locate' it correctly either in
6362 registers or on the stack. */
6363 gcc_assert (nregs
>= 0);
6365 nregs2
= nregs
? nregs
: 1;
6367 /* C4 - Argument fits entirely in core registers. */
6368 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6370 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6371 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6375 /* C5 - Some core registers left and there are no arguments already
6376 on the stack: split this argument between the remaining core
6377 registers and the stack. */
6378 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6380 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6381 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6382 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6386 /* C6 - NCRN is set to 4. */
6387 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6389 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6393 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6394 for a call to a function whose data type is FNTYPE.
6395 For a library call, FNTYPE is NULL. */
6397 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6399 tree fndecl ATTRIBUTE_UNUSED
)
6401 /* Long call handling. */
6403 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6405 pcum
->pcs_variant
= arm_pcs_default
;
6407 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6409 if (arm_libcall_uses_aapcs_base (libname
))
6410 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6412 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6413 pcum
->aapcs_reg
= NULL_RTX
;
6414 pcum
->aapcs_partial
= 0;
6415 pcum
->aapcs_arg_processed
= false;
6416 pcum
->aapcs_cprc_slot
= -1;
6417 pcum
->can_split
= true;
6419 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6423 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6425 pcum
->aapcs_cprc_failed
[i
] = false;
6426 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6434 /* On the ARM, the offset starts at 0. */
6436 pcum
->iwmmxt_nregs
= 0;
6437 pcum
->can_split
= true;
6439 /* Varargs vectors are treated the same as long long.
6440 named_count avoids having to change the way arm handles 'named' */
6441 pcum
->named_count
= 0;
6444 if (TARGET_REALLY_IWMMXT
&& fntype
)
6448 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6450 fn_arg
= TREE_CHAIN (fn_arg
))
6451 pcum
->named_count
+= 1;
6453 if (! pcum
->named_count
)
6454 pcum
->named_count
= INT_MAX
;
6458 /* Return true if mode/type need doubleword alignment. */
6460 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6463 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6465 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6466 if (!AGGREGATE_TYPE_P (type
))
6467 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6469 /* Array types: Use member alignment of element type. */
6470 if (TREE_CODE (type
) == ARRAY_TYPE
)
6471 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6473 /* Record/aggregate types: Use greatest member alignment of any member. */
6474 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6475 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6482 /* Determine where to put an argument to a function.
6483 Value is zero to push the argument on the stack,
6484 or a hard register in which to store the argument.
6486 MODE is the argument's machine mode.
6487 TYPE is the data type of the argument (as a tree).
6488 This is null for libcalls where that information may
6490 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6491 the preceding args and about the function being called.
6492 NAMED is nonzero if this argument is a named parameter
6493 (otherwise it is an extra parameter matching an ellipsis).
6495 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6496 other arguments are passed on the stack. If (NAMED == 0) (which happens
6497 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6498 defined), say it is passed in the stack (function_prologue will
6499 indeed make it pass in the stack if necessary). */
6502 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6503 const_tree type
, bool named
)
6505 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6508 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6509 a call insn (op3 of a call_value insn). */
6510 if (mode
== VOIDmode
)
6513 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6515 aapcs_layout_arg (pcum
, mode
, type
, named
);
6516 return pcum
->aapcs_reg
;
6519 /* Varargs vectors are treated the same as long long.
6520 named_count avoids having to change the way arm handles 'named' */
6521 if (TARGET_IWMMXT_ABI
6522 && arm_vector_mode_supported_p (mode
)
6523 && pcum
->named_count
> pcum
->nargs
+ 1)
6525 if (pcum
->iwmmxt_nregs
<= 9)
6526 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6529 pcum
->can_split
= false;
6534 /* Put doubleword aligned quantities in even register pairs. */
6536 && ARM_DOUBLEWORD_ALIGN
6537 && arm_needs_doubleword_align (mode
, type
))
6540 /* Only allow splitting an arg between regs and memory if all preceding
6541 args were allocated to regs. For args passed by reference we only count
6542 the reference pointer. */
6543 if (pcum
->can_split
)
6546 nregs
= ARM_NUM_REGS2 (mode
, type
);
6548 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6551 return gen_rtx_REG (mode
, pcum
->nregs
);
6555 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6557 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6558 ? DOUBLEWORD_ALIGNMENT
6563 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6564 tree type
, bool named
)
6566 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6567 int nregs
= pcum
->nregs
;
6569 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6571 aapcs_layout_arg (pcum
, mode
, type
, named
);
6572 return pcum
->aapcs_partial
;
6575 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6578 if (NUM_ARG_REGS
> nregs
6579 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6581 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6586 /* Update the data in PCUM to advance over an argument
6587 of mode MODE and data type TYPE.
6588 (TYPE is null for libcalls where that information may not be available.) */
6591 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6592 const_tree type
, bool named
)
6594 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6596 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6598 aapcs_layout_arg (pcum
, mode
, type
, named
);
6600 if (pcum
->aapcs_cprc_slot
>= 0)
6602 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6604 pcum
->aapcs_cprc_slot
= -1;
6607 /* Generic stuff. */
6608 pcum
->aapcs_arg_processed
= false;
6609 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6610 pcum
->aapcs_reg
= NULL_RTX
;
6611 pcum
->aapcs_partial
= 0;
6616 if (arm_vector_mode_supported_p (mode
)
6617 && pcum
->named_count
> pcum
->nargs
6618 && TARGET_IWMMXT_ABI
)
6619 pcum
->iwmmxt_nregs
+= 1;
6621 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6625 /* Variable sized types are passed by reference. This is a GCC
6626 extension to the ARM ABI. */
6629 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6630 machine_mode mode ATTRIBUTE_UNUSED
,
6631 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6633 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6636 /* Encode the current state of the #pragma [no_]long_calls. */
6639 OFF
, /* No #pragma [no_]long_calls is in effect. */
6640 LONG
, /* #pragma long_calls is in effect. */
6641 SHORT
/* #pragma no_long_calls is in effect. */
6644 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6647 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6649 arm_pragma_long_calls
= LONG
;
6653 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6655 arm_pragma_long_calls
= SHORT
;
6659 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6661 arm_pragma_long_calls
= OFF
;
6664 /* Handle an attribute requiring a FUNCTION_DECL;
6665 arguments as in struct attribute_spec.handler. */
6667 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6668 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6670 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6672 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6674 *no_add_attrs
= true;
6680 /* Handle an "interrupt" or "isr" attribute;
6681 arguments as in struct attribute_spec.handler. */
6683 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6688 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6690 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6692 *no_add_attrs
= true;
6694 /* FIXME: the argument if any is checked for type attributes;
6695 should it be checked for decl ones? */
6699 if (TREE_CODE (*node
) == FUNCTION_TYPE
6700 || TREE_CODE (*node
) == METHOD_TYPE
)
6702 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6704 warning (OPT_Wattributes
, "%qE attribute ignored",
6706 *no_add_attrs
= true;
6709 else if (TREE_CODE (*node
) == POINTER_TYPE
6710 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6711 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6712 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6714 *node
= build_variant_type_copy (*node
);
6715 TREE_TYPE (*node
) = build_type_attribute_variant
6717 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6718 *no_add_attrs
= true;
6722 /* Possibly pass this attribute on from the type to a decl. */
6723 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6724 | (int) ATTR_FLAG_FUNCTION_NEXT
6725 | (int) ATTR_FLAG_ARRAY_NEXT
))
6727 *no_add_attrs
= true;
6728 return tree_cons (name
, args
, NULL_TREE
);
6732 warning (OPT_Wattributes
, "%qE attribute ignored",
6741 /* Handle a "pcs" attribute; arguments as in struct
6742 attribute_spec.handler. */
6744 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6745 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6747 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6749 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6750 *no_add_attrs
= true;
6755 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6756 /* Handle the "notshared" attribute. This attribute is another way of
6757 requesting hidden visibility. ARM's compiler supports
6758 "__declspec(notshared)"; we support the same thing via an
6762 arm_handle_notshared_attribute (tree
*node
,
6763 tree name ATTRIBUTE_UNUSED
,
6764 tree args ATTRIBUTE_UNUSED
,
6765 int flags ATTRIBUTE_UNUSED
,
6768 tree decl
= TYPE_NAME (*node
);
6772 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6773 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6774 *no_add_attrs
= false;
6780 /* This function returns true if a function with declaration FNDECL and type
6781 FNTYPE uses the stack to pass arguments or return variables and false
6782 otherwise. This is used for functions with the attributes
6783 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6784 diagnostic messages if the stack is used. NAME is the name of the attribute
6788 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6790 function_args_iterator args_iter
;
6791 CUMULATIVE_ARGS args_so_far_v
;
6792 cumulative_args_t args_so_far
;
6793 bool first_param
= true;
6794 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6796 /* Error out if any argument is passed on the stack. */
6797 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6798 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6799 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6802 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6804 prev_arg_type
= arg_type
;
6805 if (VOID_TYPE_P (arg_type
))
6809 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6810 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6812 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6814 error ("%qE attribute not available to functions with arguments "
6815 "passed on the stack", name
);
6818 first_param
= false;
6821 /* Error out for variadic functions since we cannot control how many
6822 arguments will be passed and thus stack could be used. stdarg_p () is not
6823 used for the checking to avoid browsing arguments twice. */
6824 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6826 error ("%qE attribute not available to functions with variable number "
6827 "of arguments", name
);
6831 /* Error out if return value is passed on the stack. */
6832 ret_type
= TREE_TYPE (fntype
);
6833 if (arm_return_in_memory (ret_type
, fntype
))
6835 error ("%qE attribute not available to functions that return value on "
6842 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6843 function will check whether the attribute is allowed here and will add the
6844 attribute to the function declaration tree or otherwise issue a warning. */
6847 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6856 *no_add_attrs
= true;
6857 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6862 /* Ignore attribute for function types. */
6863 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6865 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6867 *no_add_attrs
= true;
6873 /* Warn for static linkage functions. */
6874 if (!TREE_PUBLIC (fndecl
))
6876 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6877 "with static linkage", name
);
6878 *no_add_attrs
= true;
6882 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6883 TREE_TYPE (fndecl
));
6888 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6889 function will check whether the attribute is allowed here and will add the
6890 attribute to the function type tree or otherwise issue a diagnostic. The
6891 reason we check this at declaration time is to only allow the use of the
6892 attribute with declarations of function pointers and not function
6893 declarations. This function checks NODE is of the expected type and issues
6894 diagnostics otherwise using NAME. If it is not of the expected type
6895 *NO_ADD_ATTRS will be set to true. */
6898 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6903 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6908 *no_add_attrs
= true;
6909 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6914 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
6917 fntype
= TREE_TYPE (decl
);
6920 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
6921 fntype
= TREE_TYPE (fntype
);
6923 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
6925 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
6926 "function pointer", name
);
6927 *no_add_attrs
= true;
6931 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
6936 /* Prevent trees being shared among function types with and without
6937 cmse_nonsecure_call attribute. */
6938 type
= TREE_TYPE (decl
);
6940 type
= build_distinct_type_copy (type
);
6941 TREE_TYPE (decl
) = type
;
6944 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
6947 fntype
= TREE_TYPE (fntype
);
6948 fntype
= build_distinct_type_copy (fntype
);
6949 TREE_TYPE (type
) = fntype
;
6952 /* Construct a type attribute and add it to the function type. */
6953 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
6954 TYPE_ATTRIBUTES (fntype
));
6955 TYPE_ATTRIBUTES (fntype
) = attrs
;
6959 /* Return 0 if the attributes for two types are incompatible, 1 if they
6960 are compatible, and 2 if they are nearly compatible (which causes a
6961 warning to be generated). */
6963 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6967 /* Check for mismatch of non-default calling convention. */
6968 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6971 /* Check for mismatched call attributes. */
6972 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6973 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6974 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6975 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6977 /* Only bother to check if an attribute is defined. */
6978 if (l1
| l2
| s1
| s2
)
6980 /* If one type has an attribute, the other must have the same attribute. */
6981 if ((l1
!= l2
) || (s1
!= s2
))
6984 /* Disallow mixed attributes. */
6985 if ((l1
& s2
) || (l2
& s1
))
6989 /* Check for mismatched ISR attribute. */
6990 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6992 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6993 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6995 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6999 l1
= lookup_attribute ("cmse_nonsecure_call",
7000 TYPE_ATTRIBUTES (type1
)) != NULL
;
7001 l2
= lookup_attribute ("cmse_nonsecure_call",
7002 TYPE_ATTRIBUTES (type2
)) != NULL
;
7010 /* Assigns default attributes to newly defined type. This is used to
7011 set short_call/long_call attributes for function types of
7012 functions defined inside corresponding #pragma scopes. */
7014 arm_set_default_type_attributes (tree type
)
7016 /* Add __attribute__ ((long_call)) to all functions, when
7017 inside #pragma long_calls or __attribute__ ((short_call)),
7018 when inside #pragma no_long_calls. */
7019 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7021 tree type_attr_list
, attr_name
;
7022 type_attr_list
= TYPE_ATTRIBUTES (type
);
7024 if (arm_pragma_long_calls
== LONG
)
7025 attr_name
= get_identifier ("long_call");
7026 else if (arm_pragma_long_calls
== SHORT
)
7027 attr_name
= get_identifier ("short_call");
7031 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7032 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7036 /* Return true if DECL is known to be linked into section SECTION. */
7039 arm_function_in_section_p (tree decl
, section
*section
)
7041 /* We can only be certain about the prevailing symbol definition. */
7042 if (!decl_binds_to_current_def_p (decl
))
7045 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7046 if (!DECL_SECTION_NAME (decl
))
7048 /* Make sure that we will not create a unique section for DECL. */
7049 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7053 return function_section (decl
) == section
;
7056 /* Return nonzero if a 32-bit "long_call" should be generated for
7057 a call from the current function to DECL. We generate a long_call
7060 a. has an __attribute__((long call))
7061 or b. is within the scope of a #pragma long_calls
7062 or c. the -mlong-calls command line switch has been specified
7064 However we do not generate a long call if the function:
7066 d. has an __attribute__ ((short_call))
7067 or e. is inside the scope of a #pragma no_long_calls
7068 or f. is defined in the same section as the current function. */
7071 arm_is_long_call_p (tree decl
)
7076 return TARGET_LONG_CALLS
;
7078 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7079 if (lookup_attribute ("short_call", attrs
))
7082 /* For "f", be conservative, and only cater for cases in which the
7083 whole of the current function is placed in the same section. */
7084 if (!flag_reorder_blocks_and_partition
7085 && TREE_CODE (decl
) == FUNCTION_DECL
7086 && arm_function_in_section_p (decl
, current_function_section ()))
7089 if (lookup_attribute ("long_call", attrs
))
7092 return TARGET_LONG_CALLS
;
7095 /* Return nonzero if it is ok to make a tail-call to DECL. */
7097 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7099 unsigned long func_type
;
7101 if (cfun
->machine
->sibcall_blocked
)
7104 /* Never tailcall something if we are generating code for Thumb-1. */
7108 /* The PIC register is live on entry to VxWorks PLT entries, so we
7109 must make the call before restoring the PIC register. */
7110 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7113 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7114 may be used both as target of the call and base register for restoring
7115 the VFP registers */
7116 if (TARGET_APCS_FRAME
&& TARGET_ARM
7117 && TARGET_HARD_FLOAT
7118 && decl
&& arm_is_long_call_p (decl
))
7121 /* If we are interworking and the function is not declared static
7122 then we can't tail-call it unless we know that it exists in this
7123 compilation unit (since it might be a Thumb routine). */
7124 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7125 && !TREE_ASM_WRITTEN (decl
))
7128 func_type
= arm_current_func_type ();
7129 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7130 if (IS_INTERRUPT (func_type
))
7133 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7134 generated for entry functions themselves. */
7135 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7138 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7139 this would complicate matters for later code generation. */
7140 if (TREE_CODE (exp
) == CALL_EXPR
)
7142 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7143 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7147 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7149 /* Check that the return value locations are the same. For
7150 example that we aren't returning a value from the sibling in
7151 a VFP register but then need to transfer it to a core
7154 tree decl_or_type
= decl
;
7156 /* If it is an indirect function pointer, get the function type. */
7158 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7160 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7161 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7163 if (!rtx_equal_p (a
, b
))
7167 /* Never tailcall if function may be called with a misaligned SP. */
7168 if (IS_STACKALIGN (func_type
))
7171 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7172 references should become a NOP. Don't convert such calls into
7174 if (TARGET_AAPCS_BASED
7175 && arm_abi
== ARM_ABI_AAPCS
7177 && DECL_WEAK (decl
))
7180 /* We cannot do a tailcall for an indirect call by descriptor if all the
7181 argument registers are used because the only register left to load the
7182 address is IP and it will already contain the static chain. */
7183 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7185 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7186 CUMULATIVE_ARGS cum
;
7187 cumulative_args_t cum_v
;
7189 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7190 cum_v
= pack_cumulative_args (&cum
);
7192 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7194 tree type
= TREE_VALUE (t
);
7195 if (!VOID_TYPE_P (type
))
7196 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7199 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7203 /* Everything else is ok. */
7208 /* Addressing mode support functions. */
7210 /* Return nonzero if X is a legitimate immediate operand when compiling
7211 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7213 legitimate_pic_operand_p (rtx x
)
7215 if (GET_CODE (x
) == SYMBOL_REF
7216 || (GET_CODE (x
) == CONST
7217 && GET_CODE (XEXP (x
, 0)) == PLUS
7218 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7224 /* Record that the current function needs a PIC register. Initialize
7225 cfun->machine->pic_reg if we have not already done so. */
7228 require_pic_register (void)
7230 /* A lot of the logic here is made obscure by the fact that this
7231 routine gets called as part of the rtx cost estimation process.
7232 We don't want those calls to affect any assumptions about the real
7233 function; and further, we can't call entry_of_function() until we
7234 start the real expansion process. */
7235 if (!crtl
->uses_pic_offset_table
)
7237 gcc_assert (can_create_pseudo_p ());
7238 if (arm_pic_register
!= INVALID_REGNUM
7239 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7241 if (!cfun
->machine
->pic_reg
)
7242 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7244 /* Play games to avoid marking the function as needing pic
7245 if we are being called as part of the cost-estimation
7247 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7248 crtl
->uses_pic_offset_table
= 1;
7252 rtx_insn
*seq
, *insn
;
7254 if (!cfun
->machine
->pic_reg
)
7255 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7257 /* Play games to avoid marking the function as needing pic
7258 if we are being called as part of the cost-estimation
7260 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7262 crtl
->uses_pic_offset_table
= 1;
7265 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7266 && arm_pic_register
> LAST_LO_REGNUM
)
7267 emit_move_insn (cfun
->machine
->pic_reg
,
7268 gen_rtx_REG (Pmode
, arm_pic_register
));
7270 arm_load_pic_register (0UL);
7275 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7277 INSN_LOCATION (insn
) = prologue_location
;
7279 /* We can be called during expansion of PHI nodes, where
7280 we can't yet emit instructions directly in the final
7281 insn stream. Queue the insns on the entry edge, they will
7282 be committed after everything else is expanded. */
7283 insert_insn_on_edge (seq
,
7284 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7291 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7293 if (GET_CODE (orig
) == SYMBOL_REF
7294 || GET_CODE (orig
) == LABEL_REF
)
7298 gcc_assert (can_create_pseudo_p ());
7299 reg
= gen_reg_rtx (Pmode
);
7302 /* VxWorks does not impose a fixed gap between segments; the run-time
7303 gap can be different from the object-file gap. We therefore can't
7304 use GOTOFF unless we are absolutely sure that the symbol is in the
7305 same segment as the GOT. Unfortunately, the flexibility of linker
7306 scripts means that we can't be sure of that in general, so assume
7307 that GOTOFF is never valid on VxWorks. */
7308 /* References to weak symbols cannot be resolved locally: they
7309 may be overridden by a non-weak definition at link time. */
7311 if ((GET_CODE (orig
) == LABEL_REF
7312 || (GET_CODE (orig
) == SYMBOL_REF
7313 && SYMBOL_REF_LOCAL_P (orig
)
7314 && (SYMBOL_REF_DECL (orig
)
7315 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7317 && arm_pic_data_is_text_relative
)
7318 insn
= arm_pic_static_addr (orig
, reg
);
7324 /* If this function doesn't have a pic register, create one now. */
7325 require_pic_register ();
7327 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7329 /* Make the MEM as close to a constant as possible. */
7330 mem
= SET_SRC (pat
);
7331 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7332 MEM_READONLY_P (mem
) = 1;
7333 MEM_NOTRAP_P (mem
) = 1;
7335 insn
= emit_insn (pat
);
7338 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7340 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7344 else if (GET_CODE (orig
) == CONST
)
7348 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7349 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7352 /* Handle the case where we have: const (UNSPEC_TLS). */
7353 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7354 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7357 /* Handle the case where we have:
7358 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7360 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7361 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7362 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7364 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7370 gcc_assert (can_create_pseudo_p ());
7371 reg
= gen_reg_rtx (Pmode
);
7374 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7376 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7377 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7378 base
== reg
? 0 : reg
);
7380 if (CONST_INT_P (offset
))
7382 /* The base register doesn't really matter, we only want to
7383 test the index for the appropriate mode. */
7384 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7386 gcc_assert (can_create_pseudo_p ());
7387 offset
= force_reg (Pmode
, offset
);
7390 if (CONST_INT_P (offset
))
7391 return plus_constant (Pmode
, base
, INTVAL (offset
));
7394 if (GET_MODE_SIZE (mode
) > 4
7395 && (GET_MODE_CLASS (mode
) == MODE_INT
7396 || TARGET_SOFT_FLOAT
))
7398 emit_insn (gen_addsi3 (reg
, base
, offset
));
7402 return gen_rtx_PLUS (Pmode
, base
, offset
);
7409 /* Find a spare register to use during the prolog of a function. */
7412 thumb_find_work_register (unsigned long pushed_regs_mask
)
7416 /* Check the argument registers first as these are call-used. The
7417 register allocation order means that sometimes r3 might be used
7418 but earlier argument registers might not, so check them all. */
7419 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7420 if (!df_regs_ever_live_p (reg
))
7423 /* Before going on to check the call-saved registers we can try a couple
7424 more ways of deducing that r3 is available. The first is when we are
7425 pushing anonymous arguments onto the stack and we have less than 4
7426 registers worth of fixed arguments(*). In this case r3 will be part of
7427 the variable argument list and so we can be sure that it will be
7428 pushed right at the start of the function. Hence it will be available
7429 for the rest of the prologue.
7430 (*): ie crtl->args.pretend_args_size is greater than 0. */
7431 if (cfun
->machine
->uses_anonymous_args
7432 && crtl
->args
.pretend_args_size
> 0)
7433 return LAST_ARG_REGNUM
;
7435 /* The other case is when we have fixed arguments but less than 4 registers
7436 worth. In this case r3 might be used in the body of the function, but
7437 it is not being used to convey an argument into the function. In theory
7438 we could just check crtl->args.size to see how many bytes are
7439 being passed in argument registers, but it seems that it is unreliable.
7440 Sometimes it will have the value 0 when in fact arguments are being
7441 passed. (See testcase execute/20021111-1.c for an example). So we also
7442 check the args_info.nregs field as well. The problem with this field is
7443 that it makes no allowances for arguments that are passed to the
7444 function but which are not used. Hence we could miss an opportunity
7445 when a function has an unused argument in r3. But it is better to be
7446 safe than to be sorry. */
7447 if (! cfun
->machine
->uses_anonymous_args
7448 && crtl
->args
.size
>= 0
7449 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7450 && (TARGET_AAPCS_BASED
7451 ? crtl
->args
.info
.aapcs_ncrn
< 4
7452 : crtl
->args
.info
.nregs
< 4))
7453 return LAST_ARG_REGNUM
;
7455 /* Otherwise look for a call-saved register that is going to be pushed. */
7456 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7457 if (pushed_regs_mask
& (1 << reg
))
7462 /* Thumb-2 can use high regs. */
7463 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7464 if (pushed_regs_mask
& (1 << reg
))
7467 /* Something went wrong - thumb_compute_save_reg_mask()
7468 should have arranged for a suitable register to be pushed. */
7472 static GTY(()) int pic_labelno
;
7474 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7478 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7480 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7482 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7485 gcc_assert (flag_pic
);
7487 pic_reg
= cfun
->machine
->pic_reg
;
7488 if (TARGET_VXWORKS_RTP
)
7490 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7491 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7492 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7494 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7496 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7497 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7501 /* We use an UNSPEC rather than a LABEL_REF because this label
7502 never appears in the code stream. */
7504 labelno
= GEN_INT (pic_labelno
++);
7505 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7506 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7508 /* On the ARM the PC register contains 'dot + 8' at the time of the
7509 addition, on the Thumb it is 'dot + 4'. */
7510 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7511 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7513 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7517 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7519 else /* TARGET_THUMB1 */
7521 if (arm_pic_register
!= INVALID_REGNUM
7522 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7524 /* We will have pushed the pic register, so we should always be
7525 able to find a work register. */
7526 pic_tmp
= gen_rtx_REG (SImode
,
7527 thumb_find_work_register (saved_regs
));
7528 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7529 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7530 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7532 else if (arm_pic_register
!= INVALID_REGNUM
7533 && arm_pic_register
> LAST_LO_REGNUM
7534 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7536 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7537 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7538 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7541 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7545 /* Need to emit this whether or not we obey regdecls,
7546 since setjmp/longjmp can cause life info to screw up. */
7550 /* Generate code to load the address of a static var when flag_pic is set. */
7552 arm_pic_static_addr (rtx orig
, rtx reg
)
7554 rtx l1
, labelno
, offset_rtx
;
7556 gcc_assert (flag_pic
);
7558 /* We use an UNSPEC rather than a LABEL_REF because this label
7559 never appears in the code stream. */
7560 labelno
= GEN_INT (pic_labelno
++);
7561 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7562 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7564 /* On the ARM the PC register contains 'dot + 8' at the time of the
7565 addition, on the Thumb it is 'dot + 4'. */
7566 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7567 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7568 UNSPEC_SYMBOL_OFFSET
);
7569 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7571 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7574 /* Return nonzero if X is valid as an ARM state addressing register. */
7576 arm_address_register_rtx_p (rtx x
, int strict_p
)
7586 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7588 return (regno
<= LAST_ARM_REGNUM
7589 || regno
>= FIRST_PSEUDO_REGISTER
7590 || regno
== FRAME_POINTER_REGNUM
7591 || regno
== ARG_POINTER_REGNUM
);
7594 /* Return TRUE if this rtx is the difference of a symbol and a label,
7595 and will reduce to a PC-relative relocation in the object file.
7596 Expressions like this can be left alone when generating PIC, rather
7597 than forced through the GOT. */
7599 pcrel_constant_p (rtx x
)
7601 if (GET_CODE (x
) == MINUS
)
7602 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7607 /* Return true if X will surely end up in an index register after next
7610 will_be_in_index_register (const_rtx x
)
7612 /* arm.md: calculate_pic_address will split this into a register. */
7613 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7616 /* Return nonzero if X is a valid ARM state address operand. */
7618 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7622 enum rtx_code code
= GET_CODE (x
);
7624 if (arm_address_register_rtx_p (x
, strict_p
))
7627 use_ldrd
= (TARGET_LDRD
7628 && (mode
== DImode
|| mode
== DFmode
));
7630 if (code
== POST_INC
|| code
== PRE_DEC
7631 || ((code
== PRE_INC
|| code
== POST_DEC
)
7632 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7633 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7635 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7636 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7637 && GET_CODE (XEXP (x
, 1)) == PLUS
7638 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7640 rtx addend
= XEXP (XEXP (x
, 1), 1);
7642 /* Don't allow ldrd post increment by register because it's hard
7643 to fixup invalid register choices. */
7645 && GET_CODE (x
) == POST_MODIFY
7649 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7650 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7653 /* After reload constants split into minipools will have addresses
7654 from a LABEL_REF. */
7655 else if (reload_completed
7656 && (code
== LABEL_REF
7658 && GET_CODE (XEXP (x
, 0)) == PLUS
7659 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7660 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7663 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7666 else if (code
== PLUS
)
7668 rtx xop0
= XEXP (x
, 0);
7669 rtx xop1
= XEXP (x
, 1);
7671 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7672 && ((CONST_INT_P (xop1
)
7673 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7674 || (!strict_p
&& will_be_in_index_register (xop1
))))
7675 || (arm_address_register_rtx_p (xop1
, strict_p
)
7676 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7680 /* Reload currently can't handle MINUS, so disable this for now */
7681 else if (GET_CODE (x
) == MINUS
)
7683 rtx xop0
= XEXP (x
, 0);
7684 rtx xop1
= XEXP (x
, 1);
7686 return (arm_address_register_rtx_p (xop0
, strict_p
)
7687 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7691 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7692 && code
== SYMBOL_REF
7693 && CONSTANT_POOL_ADDRESS_P (x
)
7695 && symbol_mentioned_p (get_pool_constant (x
))
7696 && ! pcrel_constant_p (get_pool_constant (x
))))
7702 /* Return nonzero if X is a valid Thumb-2 address operand. */
7704 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7707 enum rtx_code code
= GET_CODE (x
);
7709 if (arm_address_register_rtx_p (x
, strict_p
))
7712 use_ldrd
= (TARGET_LDRD
7713 && (mode
== DImode
|| mode
== DFmode
));
7715 if (code
== POST_INC
|| code
== PRE_DEC
7716 || ((code
== PRE_INC
|| code
== POST_DEC
)
7717 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7718 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7720 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7721 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7722 && GET_CODE (XEXP (x
, 1)) == PLUS
7723 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7725 /* Thumb-2 only has autoincrement by constant. */
7726 rtx addend
= XEXP (XEXP (x
, 1), 1);
7727 HOST_WIDE_INT offset
;
7729 if (!CONST_INT_P (addend
))
7732 offset
= INTVAL(addend
);
7733 if (GET_MODE_SIZE (mode
) <= 4)
7734 return (offset
> -256 && offset
< 256);
7736 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7737 && (offset
& 3) == 0);
7740 /* After reload constants split into minipools will have addresses
7741 from a LABEL_REF. */
7742 else if (reload_completed
7743 && (code
== LABEL_REF
7745 && GET_CODE (XEXP (x
, 0)) == PLUS
7746 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7747 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7750 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7753 else if (code
== PLUS
)
7755 rtx xop0
= XEXP (x
, 0);
7756 rtx xop1
= XEXP (x
, 1);
7758 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7759 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7760 || (!strict_p
&& will_be_in_index_register (xop1
))))
7761 || (arm_address_register_rtx_p (xop1
, strict_p
)
7762 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7765 /* Normally we can assign constant values to target registers without
7766 the help of constant pool. But there are cases we have to use constant
7768 1) assign a label to register.
7769 2) sign-extend a 8bit value to 32bit and then assign to register.
7771 Constant pool access in format:
7772 (set (reg r0) (mem (symbol_ref (".LC0"))))
7773 will cause the use of literal pool (later in function arm_reorg).
7774 So here we mark such format as an invalid format, then the compiler
7775 will adjust it into:
7776 (set (reg r0) (symbol_ref (".LC0")))
7777 (set (reg r0) (mem (reg r0))).
7778 No extra register is required, and (mem (reg r0)) won't cause the use
7779 of literal pools. */
7780 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7781 && CONSTANT_POOL_ADDRESS_P (x
))
7784 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7785 && code
== SYMBOL_REF
7786 && CONSTANT_POOL_ADDRESS_P (x
)
7788 && symbol_mentioned_p (get_pool_constant (x
))
7789 && ! pcrel_constant_p (get_pool_constant (x
))))
7795 /* Return nonzero if INDEX is valid for an address index operand in
7798 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7801 HOST_WIDE_INT range
;
7802 enum rtx_code code
= GET_CODE (index
);
7804 /* Standard coprocessor addressing modes. */
7805 if (TARGET_HARD_FLOAT
7806 && (mode
== SFmode
|| mode
== DFmode
))
7807 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7808 && INTVAL (index
) > -1024
7809 && (INTVAL (index
) & 3) == 0);
7811 /* For quad modes, we restrict the constant offset to be slightly less
7812 than what the instruction format permits. We do this because for
7813 quad mode moves, we will actually decompose them into two separate
7814 double-mode reads or writes. INDEX must therefore be a valid
7815 (double-mode) offset and so should INDEX+8. */
7816 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7817 return (code
== CONST_INT
7818 && INTVAL (index
) < 1016
7819 && INTVAL (index
) > -1024
7820 && (INTVAL (index
) & 3) == 0);
7822 /* We have no such constraint on double mode offsets, so we permit the
7823 full range of the instruction format. */
7824 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7825 return (code
== CONST_INT
7826 && INTVAL (index
) < 1024
7827 && INTVAL (index
) > -1024
7828 && (INTVAL (index
) & 3) == 0);
7830 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7831 return (code
== CONST_INT
7832 && INTVAL (index
) < 1024
7833 && INTVAL (index
) > -1024
7834 && (INTVAL (index
) & 3) == 0);
7836 if (arm_address_register_rtx_p (index
, strict_p
)
7837 && (GET_MODE_SIZE (mode
) <= 4))
7840 if (mode
== DImode
|| mode
== DFmode
)
7842 if (code
== CONST_INT
)
7844 HOST_WIDE_INT val
= INTVAL (index
);
7847 return val
> -256 && val
< 256;
7849 return val
> -4096 && val
< 4092;
7852 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7855 if (GET_MODE_SIZE (mode
) <= 4
7859 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7863 rtx xiop0
= XEXP (index
, 0);
7864 rtx xiop1
= XEXP (index
, 1);
7866 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7867 && power_of_two_operand (xiop1
, SImode
))
7868 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7869 && power_of_two_operand (xiop0
, SImode
)));
7871 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7872 || code
== ASHIFT
|| code
== ROTATERT
)
7874 rtx op
= XEXP (index
, 1);
7876 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7879 && INTVAL (op
) <= 31);
7883 /* For ARM v4 we may be doing a sign-extend operation during the
7889 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7895 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7897 return (code
== CONST_INT
7898 && INTVAL (index
) < range
7899 && INTVAL (index
) > -range
);
7902 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7903 index operand. i.e. 1, 2, 4 or 8. */
7905 thumb2_index_mul_operand (rtx op
)
7909 if (!CONST_INT_P (op
))
7913 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7916 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7918 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7920 enum rtx_code code
= GET_CODE (index
);
7922 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7923 /* Standard coprocessor addressing modes. */
7924 if (TARGET_HARD_FLOAT
7925 && (mode
== SFmode
|| mode
== DFmode
))
7926 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7927 /* Thumb-2 allows only > -256 index range for it's core register
7928 load/stores. Since we allow SF/DF in core registers, we have
7929 to use the intersection between -256~4096 (core) and -1024~1024
7931 && INTVAL (index
) > -256
7932 && (INTVAL (index
) & 3) == 0);
7934 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7936 /* For DImode assume values will usually live in core regs
7937 and only allow LDRD addressing modes. */
7938 if (!TARGET_LDRD
|| mode
!= DImode
)
7939 return (code
== CONST_INT
7940 && INTVAL (index
) < 1024
7941 && INTVAL (index
) > -1024
7942 && (INTVAL (index
) & 3) == 0);
7945 /* For quad modes, we restrict the constant offset to be slightly less
7946 than what the instruction format permits. We do this because for
7947 quad mode moves, we will actually decompose them into two separate
7948 double-mode reads or writes. INDEX must therefore be a valid
7949 (double-mode) offset and so should INDEX+8. */
7950 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7951 return (code
== CONST_INT
7952 && INTVAL (index
) < 1016
7953 && INTVAL (index
) > -1024
7954 && (INTVAL (index
) & 3) == 0);
7956 /* We have no such constraint on double mode offsets, so we permit the
7957 full range of the instruction format. */
7958 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7959 return (code
== CONST_INT
7960 && INTVAL (index
) < 1024
7961 && INTVAL (index
) > -1024
7962 && (INTVAL (index
) & 3) == 0);
7964 if (arm_address_register_rtx_p (index
, strict_p
)
7965 && (GET_MODE_SIZE (mode
) <= 4))
7968 if (mode
== DImode
|| mode
== DFmode
)
7970 if (code
== CONST_INT
)
7972 HOST_WIDE_INT val
= INTVAL (index
);
7973 /* ??? Can we assume ldrd for thumb2? */
7974 /* Thumb-2 ldrd only has reg+const addressing modes. */
7975 /* ldrd supports offsets of +-1020.
7976 However the ldr fallback does not. */
7977 return val
> -256 && val
< 256 && (val
& 3) == 0;
7985 rtx xiop0
= XEXP (index
, 0);
7986 rtx xiop1
= XEXP (index
, 1);
7988 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7989 && thumb2_index_mul_operand (xiop1
))
7990 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7991 && thumb2_index_mul_operand (xiop0
)));
7993 else if (code
== ASHIFT
)
7995 rtx op
= XEXP (index
, 1);
7997 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8000 && INTVAL (op
) <= 3);
8003 return (code
== CONST_INT
8004 && INTVAL (index
) < 4096
8005 && INTVAL (index
) > -256);
8008 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8010 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8020 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8022 return (regno
<= LAST_LO_REGNUM
8023 || regno
> LAST_VIRTUAL_REGISTER
8024 || regno
== FRAME_POINTER_REGNUM
8025 || (GET_MODE_SIZE (mode
) >= 4
8026 && (regno
== STACK_POINTER_REGNUM
8027 || regno
>= FIRST_PSEUDO_REGISTER
8028 || x
== hard_frame_pointer_rtx
8029 || x
== arg_pointer_rtx
)));
8032 /* Return nonzero if x is a legitimate index register. This is the case
8033 for any base register that can access a QImode object. */
8035 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8037 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8040 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8042 The AP may be eliminated to either the SP or the FP, so we use the
8043 least common denominator, e.g. SImode, and offsets from 0 to 64.
8045 ??? Verify whether the above is the right approach.
8047 ??? Also, the FP may be eliminated to the SP, so perhaps that
8048 needs special handling also.
8050 ??? Look at how the mips16 port solves this problem. It probably uses
8051 better ways to solve some of these problems.
8053 Although it is not incorrect, we don't accept QImode and HImode
8054 addresses based on the frame pointer or arg pointer until the
8055 reload pass starts. This is so that eliminating such addresses
8056 into stack based ones won't produce impossible code. */
8058 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8060 /* ??? Not clear if this is right. Experiment. */
8061 if (GET_MODE_SIZE (mode
) < 4
8062 && !(reload_in_progress
|| reload_completed
)
8063 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8064 || reg_mentioned_p (arg_pointer_rtx
, x
)
8065 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8066 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8067 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8068 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8071 /* Accept any base register. SP only in SImode or larger. */
8072 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8075 /* This is PC relative data before arm_reorg runs. */
8076 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8077 && GET_CODE (x
) == SYMBOL_REF
8078 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8081 /* This is PC relative data after arm_reorg runs. */
8082 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8084 && (GET_CODE (x
) == LABEL_REF
8085 || (GET_CODE (x
) == CONST
8086 && GET_CODE (XEXP (x
, 0)) == PLUS
8087 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8088 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8091 /* Post-inc indexing only supported for SImode and larger. */
8092 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8093 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8096 else if (GET_CODE (x
) == PLUS
)
8098 /* REG+REG address can be any two index registers. */
8099 /* We disallow FRAME+REG addressing since we know that FRAME
8100 will be replaced with STACK, and SP relative addressing only
8101 permits SP+OFFSET. */
8102 if (GET_MODE_SIZE (mode
) <= 4
8103 && XEXP (x
, 0) != frame_pointer_rtx
8104 && XEXP (x
, 1) != frame_pointer_rtx
8105 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8106 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8107 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8110 /* REG+const has 5-7 bit offset for non-SP registers. */
8111 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8112 || XEXP (x
, 0) == arg_pointer_rtx
)
8113 && CONST_INT_P (XEXP (x
, 1))
8114 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8117 /* REG+const has 10-bit offset for SP, but only SImode and
8118 larger is supported. */
8119 /* ??? Should probably check for DI/DFmode overflow here
8120 just like GO_IF_LEGITIMATE_OFFSET does. */
8121 else if (REG_P (XEXP (x
, 0))
8122 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8123 && GET_MODE_SIZE (mode
) >= 4
8124 && CONST_INT_P (XEXP (x
, 1))
8125 && INTVAL (XEXP (x
, 1)) >= 0
8126 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8127 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8130 else if (REG_P (XEXP (x
, 0))
8131 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8132 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8133 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8134 && REGNO (XEXP (x
, 0))
8135 <= LAST_VIRTUAL_POINTER_REGISTER
))
8136 && GET_MODE_SIZE (mode
) >= 4
8137 && CONST_INT_P (XEXP (x
, 1))
8138 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8142 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8143 && GET_MODE_SIZE (mode
) == 4
8144 && GET_CODE (x
) == SYMBOL_REF
8145 && CONSTANT_POOL_ADDRESS_P (x
)
8147 && symbol_mentioned_p (get_pool_constant (x
))
8148 && ! pcrel_constant_p (get_pool_constant (x
))))
8154 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8155 instruction of mode MODE. */
8157 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8159 switch (GET_MODE_SIZE (mode
))
8162 return val
>= 0 && val
< 32;
8165 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8169 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8175 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8178 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8179 else if (TARGET_THUMB2
)
8180 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8181 else /* if (TARGET_THUMB1) */
8182 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8185 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8187 Given an rtx X being reloaded into a reg required to be
8188 in class CLASS, return the class of reg to actually use.
8189 In general this is just CLASS, but for the Thumb core registers and
8190 immediate constants we prefer a LO_REGS class or a subset. */
8193 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8199 if (rclass
== GENERAL_REGS
)
8206 /* Build the SYMBOL_REF for __tls_get_addr. */
8208 static GTY(()) rtx tls_get_addr_libfunc
;
8211 get_tls_get_addr (void)
8213 if (!tls_get_addr_libfunc
)
8214 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8215 return tls_get_addr_libfunc
;
8219 arm_load_tp (rtx target
)
8222 target
= gen_reg_rtx (SImode
);
8226 /* Can return in any reg. */
8227 emit_insn (gen_load_tp_hard (target
));
8231 /* Always returned in r0. Immediately copy the result into a pseudo,
8232 otherwise other uses of r0 (e.g. setting up function arguments) may
8233 clobber the value. */
8237 emit_insn (gen_load_tp_soft ());
8239 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8240 emit_move_insn (target
, tmp
);
8246 load_tls_operand (rtx x
, rtx reg
)
8250 if (reg
== NULL_RTX
)
8251 reg
= gen_reg_rtx (SImode
);
8253 tmp
= gen_rtx_CONST (SImode
, x
);
8255 emit_move_insn (reg
, tmp
);
8261 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8263 rtx label
, labelno
, sum
;
8265 gcc_assert (reloc
!= TLS_DESCSEQ
);
8268 labelno
= GEN_INT (pic_labelno
++);
8269 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8270 label
= gen_rtx_CONST (VOIDmode
, label
);
8272 sum
= gen_rtx_UNSPEC (Pmode
,
8273 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8274 GEN_INT (TARGET_ARM
? 8 : 4)),
8276 reg
= load_tls_operand (sum
, reg
);
8279 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8281 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8283 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8284 LCT_PURE
, /* LCT_CONST? */
8285 Pmode
, 1, reg
, Pmode
);
8287 rtx_insn
*insns
= get_insns ();
8294 arm_tls_descseq_addr (rtx x
, rtx reg
)
8296 rtx labelno
= GEN_INT (pic_labelno
++);
8297 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8298 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8299 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8300 gen_rtx_CONST (VOIDmode
, label
),
8301 GEN_INT (!TARGET_ARM
)),
8303 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8305 emit_insn (gen_tlscall (x
, labelno
));
8307 reg
= gen_reg_rtx (SImode
);
8309 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8311 emit_move_insn (reg
, reg0
);
8317 legitimize_tls_address (rtx x
, rtx reg
)
8319 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8321 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8325 case TLS_MODEL_GLOBAL_DYNAMIC
:
8326 if (TARGET_GNU2_TLS
)
8328 reg
= arm_tls_descseq_addr (x
, reg
);
8330 tp
= arm_load_tp (NULL_RTX
);
8332 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8336 /* Original scheme */
8337 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8338 dest
= gen_reg_rtx (Pmode
);
8339 emit_libcall_block (insns
, dest
, ret
, x
);
8343 case TLS_MODEL_LOCAL_DYNAMIC
:
8344 if (TARGET_GNU2_TLS
)
8346 reg
= arm_tls_descseq_addr (x
, reg
);
8348 tp
= arm_load_tp (NULL_RTX
);
8350 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8354 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8356 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8357 share the LDM result with other LD model accesses. */
8358 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8360 dest
= gen_reg_rtx (Pmode
);
8361 emit_libcall_block (insns
, dest
, ret
, eqv
);
8363 /* Load the addend. */
8364 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8365 GEN_INT (TLS_LDO32
)),
8367 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8368 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8372 case TLS_MODEL_INITIAL_EXEC
:
8373 labelno
= GEN_INT (pic_labelno
++);
8374 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8375 label
= gen_rtx_CONST (VOIDmode
, label
);
8376 sum
= gen_rtx_UNSPEC (Pmode
,
8377 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8378 GEN_INT (TARGET_ARM
? 8 : 4)),
8380 reg
= load_tls_operand (sum
, reg
);
8383 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8384 else if (TARGET_THUMB2
)
8385 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8388 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8389 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8392 tp
= arm_load_tp (NULL_RTX
);
8394 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8396 case TLS_MODEL_LOCAL_EXEC
:
8397 tp
= arm_load_tp (NULL_RTX
);
8399 reg
= gen_rtx_UNSPEC (Pmode
,
8400 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8402 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8404 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8411 /* Try machine-dependent ways of modifying an illegitimate address
8412 to be legitimate. If we find one, return the new, valid address. */
8414 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8416 if (arm_tls_referenced_p (x
))
8420 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8422 addend
= XEXP (XEXP (x
, 0), 1);
8423 x
= XEXP (XEXP (x
, 0), 0);
8426 if (GET_CODE (x
) != SYMBOL_REF
)
8429 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8431 x
= legitimize_tls_address (x
, NULL_RTX
);
8435 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8444 /* TODO: legitimize_address for Thumb2. */
8447 return thumb_legitimize_address (x
, orig_x
, mode
);
8450 if (GET_CODE (x
) == PLUS
)
8452 rtx xop0
= XEXP (x
, 0);
8453 rtx xop1
= XEXP (x
, 1);
8455 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8456 xop0
= force_reg (SImode
, xop0
);
8458 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8459 && !symbol_mentioned_p (xop1
))
8460 xop1
= force_reg (SImode
, xop1
);
8462 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8463 && CONST_INT_P (xop1
))
8465 HOST_WIDE_INT n
, low_n
;
8469 /* VFP addressing modes actually allow greater offsets, but for
8470 now we just stick with the lowest common denominator. */
8471 if (mode
== DImode
|| mode
== DFmode
)
8483 low_n
= ((mode
) == TImode
? 0
8484 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8488 base_reg
= gen_reg_rtx (SImode
);
8489 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8490 emit_move_insn (base_reg
, val
);
8491 x
= plus_constant (Pmode
, base_reg
, low_n
);
8493 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8494 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8497 /* XXX We don't allow MINUS any more -- see comment in
8498 arm_legitimate_address_outer_p (). */
8499 else if (GET_CODE (x
) == MINUS
)
8501 rtx xop0
= XEXP (x
, 0);
8502 rtx xop1
= XEXP (x
, 1);
8504 if (CONSTANT_P (xop0
))
8505 xop0
= force_reg (SImode
, xop0
);
8507 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8508 xop1
= force_reg (SImode
, xop1
);
8510 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8511 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8514 /* Make sure to take full advantage of the pre-indexed addressing mode
8515 with absolute addresses which often allows for the base register to
8516 be factorized for multiple adjacent memory references, and it might
8517 even allows for the mini pool to be avoided entirely. */
8518 else if (CONST_INT_P (x
) && optimize
> 0)
8521 HOST_WIDE_INT mask
, base
, index
;
8524 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8525 use a 8-bit index. So let's use a 12-bit index for SImode only and
8526 hope that arm_gen_constant will enable ldrb to use more bits. */
8527 bits
= (mode
== SImode
) ? 12 : 8;
8528 mask
= (1 << bits
) - 1;
8529 base
= INTVAL (x
) & ~mask
;
8530 index
= INTVAL (x
) & mask
;
8531 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8533 /* It'll most probably be more efficient to generate the base
8534 with more bits set and use a negative index instead. */
8538 base_reg
= force_reg (SImode
, GEN_INT (base
));
8539 x
= plus_constant (Pmode
, base_reg
, index
);
8544 /* We need to find and carefully transform any SYMBOL and LABEL
8545 references; so go back to the original address expression. */
8546 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8548 if (new_x
!= orig_x
)
8556 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8557 to be legitimate. If we find one, return the new, valid address. */
8559 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8561 if (GET_CODE (x
) == PLUS
8562 && CONST_INT_P (XEXP (x
, 1))
8563 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8564 || INTVAL (XEXP (x
, 1)) < 0))
8566 rtx xop0
= XEXP (x
, 0);
8567 rtx xop1
= XEXP (x
, 1);
8568 HOST_WIDE_INT offset
= INTVAL (xop1
);
8570 /* Try and fold the offset into a biasing of the base register and
8571 then offsetting that. Don't do this when optimizing for space
8572 since it can cause too many CSEs. */
8573 if (optimize_size
&& offset
>= 0
8574 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8576 HOST_WIDE_INT delta
;
8579 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8580 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8581 delta
= 31 * GET_MODE_SIZE (mode
);
8583 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8585 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8587 x
= plus_constant (Pmode
, xop0
, delta
);
8589 else if (offset
< 0 && offset
> -256)
8590 /* Small negative offsets are best done with a subtract before the
8591 dereference, forcing these into a register normally takes two
8593 x
= force_operand (x
, NULL_RTX
);
8596 /* For the remaining cases, force the constant into a register. */
8597 xop1
= force_reg (SImode
, xop1
);
8598 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8601 else if (GET_CODE (x
) == PLUS
8602 && s_register_operand (XEXP (x
, 1), SImode
)
8603 && !s_register_operand (XEXP (x
, 0), SImode
))
8605 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8607 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8612 /* We need to find and carefully transform any SYMBOL and LABEL
8613 references; so go back to the original address expression. */
8614 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8616 if (new_x
!= orig_x
)
8623 /* Return TRUE if X contains any TLS symbol references. */
8626 arm_tls_referenced_p (rtx x
)
8628 if (! TARGET_HAVE_TLS
)
8631 subrtx_iterator::array_type array
;
8632 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8634 const_rtx x
= *iter
;
8635 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8638 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8639 TLS offsets, not real symbol references. */
8640 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8641 iter
.skip_subrtxes ();
8646 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8648 On the ARM, allow any integer (invalid ones are removed later by insn
8649 patterns), nice doubles and symbol_refs which refer to the function's
8652 When generating pic allow anything. */
8655 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8657 return flag_pic
|| !label_mentioned_p (x
);
8661 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8663 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8664 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8665 for ARMv8-M Baseline or later the result is valid. */
8666 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8669 return (CONST_INT_P (x
)
8670 || CONST_DOUBLE_P (x
)
8671 || CONSTANT_ADDRESS_P (x
)
8676 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8678 return (!arm_cannot_force_const_mem (mode
, x
)
8680 ? arm_legitimate_constant_p_1 (mode
, x
)
8681 : thumb_legitimate_constant_p (mode
, x
)));
8684 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8687 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8691 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8693 split_const (x
, &base
, &offset
);
8694 if (GET_CODE (base
) == SYMBOL_REF
8695 && !offset_within_block_p (base
, INTVAL (offset
)))
8698 return arm_tls_referenced_p (x
);
8701 #define REG_OR_SUBREG_REG(X) \
8703 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8705 #define REG_OR_SUBREG_RTX(X) \
8706 (REG_P (X) ? (X) : SUBREG_REG (X))
8709 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8711 machine_mode mode
= GET_MODE (x
);
8720 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8727 return COSTS_N_INSNS (1);
8730 if (CONST_INT_P (XEXP (x
, 1)))
8733 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8740 return COSTS_N_INSNS (2) + cycles
;
8742 return COSTS_N_INSNS (1) + 16;
8745 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8747 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8748 return (COSTS_N_INSNS (words
)
8749 + 4 * ((MEM_P (SET_SRC (x
)))
8750 + MEM_P (SET_DEST (x
))));
8755 if (UINTVAL (x
) < 256
8756 /* 16-bit constant. */
8757 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8759 if (thumb_shiftable_const (INTVAL (x
)))
8760 return COSTS_N_INSNS (2);
8761 return COSTS_N_INSNS (3);
8763 else if ((outer
== PLUS
|| outer
== COMPARE
)
8764 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8766 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8767 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8768 return COSTS_N_INSNS (1);
8769 else if (outer
== AND
)
8772 /* This duplicates the tests in the andsi3 expander. */
8773 for (i
= 9; i
<= 31; i
++)
8774 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8775 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8776 return COSTS_N_INSNS (2);
8778 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8779 || outer
== LSHIFTRT
)
8781 return COSTS_N_INSNS (2);
8787 return COSTS_N_INSNS (3);
8805 /* XXX another guess. */
8806 /* Memory costs quite a lot for the first word, but subsequent words
8807 load at the equivalent of a single insn each. */
8808 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8809 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8814 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8820 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8821 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8827 return total
+ COSTS_N_INSNS (1);
8829 /* Assume a two-shift sequence. Increase the cost slightly so
8830 we prefer actual shifts over an extend operation. */
8831 return total
+ 1 + COSTS_N_INSNS (2);
8838 /* Estimates the size cost of thumb1 instructions.
8839 For now most of the code is copied from thumb1_rtx_costs. We need more
8840 fine grain tuning when we have more related test cases. */
8842 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8844 machine_mode mode
= GET_MODE (x
);
8853 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8857 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8858 defined by RTL expansion, especially for the expansion of
8860 if ((GET_CODE (XEXP (x
, 0)) == MULT
8861 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8862 || (GET_CODE (XEXP (x
, 1)) == MULT
8863 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8864 return COSTS_N_INSNS (2);
8869 return COSTS_N_INSNS (1);
8872 if (CONST_INT_P (XEXP (x
, 1)))
8874 /* Thumb1 mul instruction can't operate on const. We must Load it
8875 into a register first. */
8876 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8877 /* For the targets which have a very small and high-latency multiply
8878 unit, we prefer to synthesize the mult with up to 5 instructions,
8879 giving a good balance between size and performance. */
8880 if (arm_arch6m
&& arm_m_profile_small_mul
)
8881 return COSTS_N_INSNS (5);
8883 return COSTS_N_INSNS (1) + const_size
;
8885 return COSTS_N_INSNS (1);
8888 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8890 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8891 cost
= COSTS_N_INSNS (words
);
8892 if (satisfies_constraint_J (SET_SRC (x
))
8893 || satisfies_constraint_K (SET_SRC (x
))
8894 /* Too big an immediate for a 2-byte mov, using MOVT. */
8895 || (CONST_INT_P (SET_SRC (x
))
8896 && UINTVAL (SET_SRC (x
)) >= 256
8898 && satisfies_constraint_j (SET_SRC (x
)))
8899 /* thumb1_movdi_insn. */
8900 || ((words
> 1) && MEM_P (SET_SRC (x
))))
8901 cost
+= COSTS_N_INSNS (1);
8907 if (UINTVAL (x
) < 256)
8908 return COSTS_N_INSNS (1);
8909 /* movw is 4byte long. */
8910 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
8911 return COSTS_N_INSNS (2);
8912 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8913 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8914 return COSTS_N_INSNS (2);
8915 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8916 if (thumb_shiftable_const (INTVAL (x
)))
8917 return COSTS_N_INSNS (2);
8918 return COSTS_N_INSNS (3);
8920 else if ((outer
== PLUS
|| outer
== COMPARE
)
8921 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8923 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8924 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8925 return COSTS_N_INSNS (1);
8926 else if (outer
== AND
)
8929 /* This duplicates the tests in the andsi3 expander. */
8930 for (i
= 9; i
<= 31; i
++)
8931 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8932 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8933 return COSTS_N_INSNS (2);
8935 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8936 || outer
== LSHIFTRT
)
8938 return COSTS_N_INSNS (2);
8944 return COSTS_N_INSNS (3);
8958 return COSTS_N_INSNS (1);
8961 return (COSTS_N_INSNS (1)
8963 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8964 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8965 ? COSTS_N_INSNS (1) : 0));
8969 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8974 /* XXX still guessing. */
8975 switch (GET_MODE (XEXP (x
, 0)))
8978 return (1 + (mode
== DImode
? 4 : 0)
8979 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8982 return (4 + (mode
== DImode
? 4 : 0)
8983 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8986 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8997 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8998 operand, then return the operand that is being shifted. If the shift
8999 is not by a constant, then set SHIFT_REG to point to the operand.
9000 Return NULL if OP is not a shifter operand. */
9002 shifter_op_p (rtx op
, rtx
*shift_reg
)
9004 enum rtx_code code
= GET_CODE (op
);
9006 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9007 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9008 return XEXP (op
, 0);
9009 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9010 return XEXP (op
, 0);
9011 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9012 || code
== ASHIFTRT
)
9014 if (!CONST_INT_P (XEXP (op
, 1)))
9015 *shift_reg
= XEXP (op
, 1);
9016 return XEXP (op
, 0);
9023 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9025 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9026 rtx_code code
= GET_CODE (x
);
9027 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9029 switch (XINT (x
, 1))
9031 case UNSPEC_UNALIGNED_LOAD
:
9032 /* We can only do unaligned loads into the integer unit, and we can't
9034 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9036 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9037 + extra_cost
->ldst
.load_unaligned
);
9040 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9041 ADDR_SPACE_GENERIC
, speed_p
);
9045 case UNSPEC_UNALIGNED_STORE
:
9046 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9048 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9049 + extra_cost
->ldst
.store_unaligned
);
9051 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9053 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9054 ADDR_SPACE_GENERIC
, speed_p
);
9065 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9069 *cost
= COSTS_N_INSNS (2);
9075 /* Cost of a libcall. We assume one insn per argument, an amount for the
9076 call (one insn for -Os) and then one for processing the result. */
9077 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9079 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9082 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9083 if (shift_op != NULL \
9084 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9089 *cost += extra_cost->alu.arith_shift_reg; \
9090 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9091 ASHIFT, 1, speed_p); \
9094 *cost += extra_cost->alu.arith_shift; \
9096 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9097 ASHIFT, 0, speed_p) \
9098 + rtx_cost (XEXP (x, 1 - IDX), \
9099 GET_MODE (shift_op), \
9106 /* RTX costs. Make an estimate of the cost of executing the operation
9107 X, which is contained with an operation with code OUTER_CODE.
9108 SPEED_P indicates whether the cost desired is the performance cost,
9109 or the size cost. The estimate is stored in COST and the return
9110 value is TRUE if the cost calculation is final, or FALSE if the
9111 caller should recurse through the operands of X to add additional
9114 We currently make no attempt to model the size savings of Thumb-2
9115 16-bit instructions. At the normal points in compilation where
9116 this code is called we have no measure of whether the condition
9117 flags are live or not, and thus no realistic way to determine what
9118 the size will eventually be. */
9120 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9121 const struct cpu_cost_table
*extra_cost
,
9122 int *cost
, bool speed_p
)
9124 machine_mode mode
= GET_MODE (x
);
9126 *cost
= COSTS_N_INSNS (1);
9131 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9133 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9141 /* SET RTXs don't have a mode so we get it from the destination. */
9142 mode
= GET_MODE (SET_DEST (x
));
9144 if (REG_P (SET_SRC (x
))
9145 && REG_P (SET_DEST (x
)))
9147 /* Assume that most copies can be done with a single insn,
9148 unless we don't have HW FP, in which case everything
9149 larger than word mode will require two insns. */
9150 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9151 && GET_MODE_SIZE (mode
) > 4)
9154 /* Conditional register moves can be encoded
9155 in 16 bits in Thumb mode. */
9156 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9162 if (CONST_INT_P (SET_SRC (x
)))
9164 /* Handle CONST_INT here, since the value doesn't have a mode
9165 and we would otherwise be unable to work out the true cost. */
9166 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9169 /* Slightly lower the cost of setting a core reg to a constant.
9170 This helps break up chains and allows for better scheduling. */
9171 if (REG_P (SET_DEST (x
))
9172 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9175 /* Immediate moves with an immediate in the range [0, 255] can be
9176 encoded in 16 bits in Thumb mode. */
9177 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9178 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9180 goto const_int_cost
;
9186 /* A memory access costs 1 insn if the mode is small, or the address is
9187 a single register, otherwise it costs one insn per word. */
9188 if (REG_P (XEXP (x
, 0)))
9189 *cost
= COSTS_N_INSNS (1);
9191 && GET_CODE (XEXP (x
, 0)) == PLUS
9192 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9193 /* This will be split into two instructions.
9194 See arm.md:calculate_pic_address. */
9195 *cost
= COSTS_N_INSNS (2);
9197 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9199 /* For speed optimizations, add the costs of the address and
9200 accessing memory. */
9203 *cost
+= (extra_cost
->ldst
.load
9204 + arm_address_cost (XEXP (x
, 0), mode
,
9205 ADDR_SPACE_GENERIC
, speed_p
));
9207 *cost
+= extra_cost
->ldst
.load
;
9213 /* Calculations of LDM costs are complex. We assume an initial cost
9214 (ldm_1st) which will load the number of registers mentioned in
9215 ldm_regs_per_insn_1st registers; then each additional
9216 ldm_regs_per_insn_subsequent registers cost one more insn. The
9217 formula for N regs is thus:
9219 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9220 + ldm_regs_per_insn_subsequent - 1)
9221 / ldm_regs_per_insn_subsequent).
9223 Additional costs may also be added for addressing. A similar
9224 formula is used for STM. */
9226 bool is_ldm
= load_multiple_operation (x
, SImode
);
9227 bool is_stm
= store_multiple_operation (x
, SImode
);
9229 if (is_ldm
|| is_stm
)
9233 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9234 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9235 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9236 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9237 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9238 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9239 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9241 *cost
+= regs_per_insn_1st
9242 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9243 + regs_per_insn_sub
- 1)
9244 / regs_per_insn_sub
);
9253 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9254 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9255 *cost
+= COSTS_N_INSNS (speed_p
9256 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9257 else if (mode
== SImode
&& TARGET_IDIV
)
9258 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9260 *cost
= LIBCALL_COST (2);
9261 return false; /* All arguments must be in registers. */
9264 /* MOD by a power of 2 can be expanded as:
9266 and r0, r0, #(n - 1)
9267 and r1, r1, #(n - 1)
9268 rsbpl r0, r1, #0. */
9269 if (CONST_INT_P (XEXP (x
, 1))
9270 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9273 *cost
+= COSTS_N_INSNS (3);
9276 *cost
+= 2 * extra_cost
->alu
.logical
9277 + extra_cost
->alu
.arith
;
9283 *cost
= LIBCALL_COST (2);
9284 return false; /* All arguments must be in registers. */
9287 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9289 *cost
+= (COSTS_N_INSNS (1)
9290 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9292 *cost
+= extra_cost
->alu
.shift_reg
;
9300 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9302 *cost
+= (COSTS_N_INSNS (2)
9303 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9305 *cost
+= 2 * extra_cost
->alu
.shift
;
9308 else if (mode
== SImode
)
9310 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9311 /* Slightly disparage register shifts at -Os, but not by much. */
9312 if (!CONST_INT_P (XEXP (x
, 1)))
9313 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9314 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9317 else if (GET_MODE_CLASS (mode
) == MODE_INT
9318 && GET_MODE_SIZE (mode
) < 4)
9322 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9323 /* Slightly disparage register shifts at -Os, but not by
9325 if (!CONST_INT_P (XEXP (x
, 1)))
9326 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9327 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9329 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9331 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9333 /* Can use SBFX/UBFX. */
9335 *cost
+= extra_cost
->alu
.bfx
;
9336 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9340 *cost
+= COSTS_N_INSNS (1);
9341 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9344 if (CONST_INT_P (XEXP (x
, 1)))
9345 *cost
+= 2 * extra_cost
->alu
.shift
;
9347 *cost
+= (extra_cost
->alu
.shift
9348 + extra_cost
->alu
.shift_reg
);
9351 /* Slightly disparage register shifts. */
9352 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9357 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9358 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9361 if (CONST_INT_P (XEXP (x
, 1)))
9362 *cost
+= (2 * extra_cost
->alu
.shift
9363 + extra_cost
->alu
.log_shift
);
9365 *cost
+= (extra_cost
->alu
.shift
9366 + extra_cost
->alu
.shift_reg
9367 + extra_cost
->alu
.log_shift_reg
);
9373 *cost
= LIBCALL_COST (2);
9382 *cost
+= extra_cost
->alu
.rev
;
9389 /* No rev instruction available. Look at arm_legacy_rev
9390 and thumb_legacy_rev for the form of RTL used then. */
9393 *cost
+= COSTS_N_INSNS (9);
9397 *cost
+= 6 * extra_cost
->alu
.shift
;
9398 *cost
+= 3 * extra_cost
->alu
.logical
;
9403 *cost
+= COSTS_N_INSNS (4);
9407 *cost
+= 2 * extra_cost
->alu
.shift
;
9408 *cost
+= extra_cost
->alu
.arith_shift
;
9409 *cost
+= 2 * extra_cost
->alu
.logical
;
9417 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9418 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9420 if (GET_CODE (XEXP (x
, 0)) == MULT
9421 || GET_CODE (XEXP (x
, 1)) == MULT
)
9423 rtx mul_op0
, mul_op1
, sub_op
;
9426 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9428 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9430 mul_op0
= XEXP (XEXP (x
, 0), 0);
9431 mul_op1
= XEXP (XEXP (x
, 0), 1);
9432 sub_op
= XEXP (x
, 1);
9436 mul_op0
= XEXP (XEXP (x
, 1), 0);
9437 mul_op1
= XEXP (XEXP (x
, 1), 1);
9438 sub_op
= XEXP (x
, 0);
9441 /* The first operand of the multiply may be optionally
9443 if (GET_CODE (mul_op0
) == NEG
)
9444 mul_op0
= XEXP (mul_op0
, 0);
9446 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9447 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9448 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9454 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9460 rtx shift_by_reg
= NULL
;
9464 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9465 if (shift_op
== NULL
)
9467 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9468 non_shift_op
= XEXP (x
, 0);
9471 non_shift_op
= XEXP (x
, 1);
9473 if (shift_op
!= NULL
)
9475 if (shift_by_reg
!= NULL
)
9478 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9479 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9482 *cost
+= extra_cost
->alu
.arith_shift
;
9484 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9485 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9490 && GET_CODE (XEXP (x
, 1)) == MULT
)
9494 *cost
+= extra_cost
->mult
[0].add
;
9495 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9496 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9497 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9501 if (CONST_INT_P (XEXP (x
, 0)))
9503 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9504 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9506 *cost
= COSTS_N_INSNS (insns
);
9508 *cost
+= insns
* extra_cost
->alu
.arith
;
9509 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9513 *cost
+= extra_cost
->alu
.arith
;
9518 if (GET_MODE_CLASS (mode
) == MODE_INT
9519 && GET_MODE_SIZE (mode
) < 4)
9521 rtx shift_op
, shift_reg
;
9524 /* We check both sides of the MINUS for shifter operands since,
9525 unlike PLUS, it's not commutative. */
9527 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9528 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9530 /* Slightly disparage, as we might need to widen the result. */
9533 *cost
+= extra_cost
->alu
.arith
;
9535 if (CONST_INT_P (XEXP (x
, 0)))
9537 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9546 *cost
+= COSTS_N_INSNS (1);
9548 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9550 rtx op1
= XEXP (x
, 1);
9553 *cost
+= 2 * extra_cost
->alu
.arith
;
9555 if (GET_CODE (op1
) == ZERO_EXTEND
)
9556 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9559 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9560 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9564 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9567 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9568 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9570 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9573 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9574 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9577 *cost
+= (extra_cost
->alu
.arith
9578 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9579 ? extra_cost
->alu
.arith
9580 : extra_cost
->alu
.arith_shift
));
9581 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9582 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9583 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9588 *cost
+= 2 * extra_cost
->alu
.arith
;
9594 *cost
= LIBCALL_COST (2);
9598 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9599 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9601 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9603 rtx mul_op0
, mul_op1
, add_op
;
9606 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9608 mul_op0
= XEXP (XEXP (x
, 0), 0);
9609 mul_op1
= XEXP (XEXP (x
, 0), 1);
9610 add_op
= XEXP (x
, 1);
9612 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9613 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9614 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9620 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9623 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9625 *cost
= LIBCALL_COST (2);
9629 /* Narrow modes can be synthesized in SImode, but the range
9630 of useful sub-operations is limited. Check for shift operations
9631 on one of the operands. Only left shifts can be used in the
9633 if (GET_MODE_CLASS (mode
) == MODE_INT
9634 && GET_MODE_SIZE (mode
) < 4)
9636 rtx shift_op
, shift_reg
;
9639 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9641 if (CONST_INT_P (XEXP (x
, 1)))
9643 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9644 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9646 *cost
= COSTS_N_INSNS (insns
);
9648 *cost
+= insns
* extra_cost
->alu
.arith
;
9649 /* Slightly penalize a narrow operation as the result may
9651 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9655 /* Slightly penalize a narrow operation as the result may
9659 *cost
+= extra_cost
->alu
.arith
;
9666 rtx shift_op
, shift_reg
;
9669 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9670 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9672 /* UXTA[BH] or SXTA[BH]. */
9674 *cost
+= extra_cost
->alu
.extend_arith
;
9675 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9677 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9682 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9683 if (shift_op
!= NULL
)
9688 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9689 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9692 *cost
+= extra_cost
->alu
.arith_shift
;
9694 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9695 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9698 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9700 rtx mul_op
= XEXP (x
, 0);
9702 if (TARGET_DSP_MULTIPLY
9703 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9704 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9705 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9706 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9707 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9708 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9709 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9710 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9711 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9712 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9713 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9714 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9719 *cost
+= extra_cost
->mult
[0].extend_add
;
9720 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9721 SIGN_EXTEND
, 0, speed_p
)
9722 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9723 SIGN_EXTEND
, 0, speed_p
)
9724 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9729 *cost
+= extra_cost
->mult
[0].add
;
9730 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9731 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9732 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9735 if (CONST_INT_P (XEXP (x
, 1)))
9737 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9738 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9740 *cost
= COSTS_N_INSNS (insns
);
9742 *cost
+= insns
* extra_cost
->alu
.arith
;
9743 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9747 *cost
+= extra_cost
->alu
.arith
;
9755 && GET_CODE (XEXP (x
, 0)) == MULT
9756 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9757 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9758 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9759 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9762 *cost
+= extra_cost
->mult
[1].extend_add
;
9763 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9764 ZERO_EXTEND
, 0, speed_p
)
9765 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9766 ZERO_EXTEND
, 0, speed_p
)
9767 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9771 *cost
+= COSTS_N_INSNS (1);
9773 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9774 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9777 *cost
+= (extra_cost
->alu
.arith
9778 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9779 ? extra_cost
->alu
.arith
9780 : extra_cost
->alu
.arith_shift
));
9782 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9784 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9789 *cost
+= 2 * extra_cost
->alu
.arith
;
9794 *cost
= LIBCALL_COST (2);
9797 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9800 *cost
+= extra_cost
->alu
.rev
;
9808 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9809 rtx op0
= XEXP (x
, 0);
9810 rtx shift_op
, shift_reg
;
9814 || (code
== IOR
&& TARGET_THUMB2
)))
9815 op0
= XEXP (op0
, 0);
9818 shift_op
= shifter_op_p (op0
, &shift_reg
);
9819 if (shift_op
!= NULL
)
9824 *cost
+= extra_cost
->alu
.log_shift_reg
;
9825 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9828 *cost
+= extra_cost
->alu
.log_shift
;
9830 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9831 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9835 if (CONST_INT_P (XEXP (x
, 1)))
9837 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9838 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9841 *cost
= COSTS_N_INSNS (insns
);
9843 *cost
+= insns
* extra_cost
->alu
.logical
;
9844 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9849 *cost
+= extra_cost
->alu
.logical
;
9850 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9851 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9857 rtx op0
= XEXP (x
, 0);
9858 enum rtx_code subcode
= GET_CODE (op0
);
9860 *cost
+= COSTS_N_INSNS (1);
9864 || (code
== IOR
&& TARGET_THUMB2
)))
9865 op0
= XEXP (op0
, 0);
9867 if (GET_CODE (op0
) == ZERO_EXTEND
)
9870 *cost
+= 2 * extra_cost
->alu
.logical
;
9872 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9874 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9877 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9880 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9882 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9884 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9889 *cost
+= 2 * extra_cost
->alu
.logical
;
9895 *cost
= LIBCALL_COST (2);
9899 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9900 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9902 rtx op0
= XEXP (x
, 0);
9904 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
9905 op0
= XEXP (op0
, 0);
9908 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9910 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
9911 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
9914 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9916 *cost
= LIBCALL_COST (2);
9922 if (TARGET_DSP_MULTIPLY
9923 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9924 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9925 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9926 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9927 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9928 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9929 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9930 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9931 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9932 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9933 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9934 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9939 *cost
+= extra_cost
->mult
[0].extend
;
9940 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
9941 SIGN_EXTEND
, 0, speed_p
);
9942 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
9943 SIGN_EXTEND
, 1, speed_p
);
9947 *cost
+= extra_cost
->mult
[0].simple
;
9954 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9955 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9956 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9957 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9960 *cost
+= extra_cost
->mult
[1].extend
;
9961 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
9962 ZERO_EXTEND
, 0, speed_p
)
9963 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9964 ZERO_EXTEND
, 0, speed_p
));
9968 *cost
= LIBCALL_COST (2);
9973 *cost
= LIBCALL_COST (2);
9977 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9978 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9980 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9983 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
9988 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9992 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9994 *cost
= LIBCALL_COST (1);
10000 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10002 *cost
+= COSTS_N_INSNS (1);
10003 /* Assume the non-flag-changing variant. */
10005 *cost
+= (extra_cost
->alu
.log_shift
10006 + extra_cost
->alu
.arith_shift
);
10007 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10011 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10012 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10014 *cost
+= COSTS_N_INSNS (1);
10015 /* No extra cost for MOV imm and MVN imm. */
10016 /* If the comparison op is using the flags, there's no further
10017 cost, otherwise we need to add the cost of the comparison. */
10018 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10019 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10020 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10022 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10023 *cost
+= (COSTS_N_INSNS (1)
10024 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10026 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10029 *cost
+= extra_cost
->alu
.arith
;
10035 *cost
+= extra_cost
->alu
.arith
;
10039 if (GET_MODE_CLASS (mode
) == MODE_INT
10040 && GET_MODE_SIZE (mode
) < 4)
10042 /* Slightly disparage, as we might need an extend operation. */
10045 *cost
+= extra_cost
->alu
.arith
;
10049 if (mode
== DImode
)
10051 *cost
+= COSTS_N_INSNS (1);
10053 *cost
+= 2 * extra_cost
->alu
.arith
;
10058 *cost
= LIBCALL_COST (1);
10062 if (mode
== SImode
)
10065 rtx shift_reg
= NULL
;
10067 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10071 if (shift_reg
!= NULL
)
10074 *cost
+= extra_cost
->alu
.log_shift_reg
;
10075 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10078 *cost
+= extra_cost
->alu
.log_shift
;
10079 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10084 *cost
+= extra_cost
->alu
.logical
;
10087 if (mode
== DImode
)
10089 *cost
+= COSTS_N_INSNS (1);
10095 *cost
+= LIBCALL_COST (1);
10100 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10102 *cost
+= COSTS_N_INSNS (3);
10105 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10106 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10108 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10109 /* Assume that if one arm of the if_then_else is a register,
10110 that it will be tied with the result and eliminate the
10111 conditional insn. */
10112 if (REG_P (XEXP (x
, 1)))
10114 else if (REG_P (XEXP (x
, 2)))
10120 if (extra_cost
->alu
.non_exec_costs_exec
)
10121 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10123 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10126 *cost
+= op1cost
+ op2cost
;
10132 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10136 machine_mode op0mode
;
10137 /* We'll mostly assume that the cost of a compare is the cost of the
10138 LHS. However, there are some notable exceptions. */
10140 /* Floating point compares are never done as side-effects. */
10141 op0mode
= GET_MODE (XEXP (x
, 0));
10142 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10143 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10146 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10148 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10150 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10156 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10158 *cost
= LIBCALL_COST (2);
10162 /* DImode compares normally take two insns. */
10163 if (op0mode
== DImode
)
10165 *cost
+= COSTS_N_INSNS (1);
10167 *cost
+= 2 * extra_cost
->alu
.arith
;
10171 if (op0mode
== SImode
)
10176 if (XEXP (x
, 1) == const0_rtx
10177 && !(REG_P (XEXP (x
, 0))
10178 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10179 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10181 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10183 /* Multiply operations that set the flags are often
10184 significantly more expensive. */
10186 && GET_CODE (XEXP (x
, 0)) == MULT
10187 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10188 *cost
+= extra_cost
->mult
[0].flag_setting
;
10191 && GET_CODE (XEXP (x
, 0)) == PLUS
10192 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10193 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10195 *cost
+= extra_cost
->mult
[0].flag_setting
;
10200 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10201 if (shift_op
!= NULL
)
10203 if (shift_reg
!= NULL
)
10205 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10208 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10211 *cost
+= extra_cost
->alu
.arith_shift
;
10212 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10213 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10218 *cost
+= extra_cost
->alu
.arith
;
10219 if (CONST_INT_P (XEXP (x
, 1))
10220 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10222 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10230 *cost
= LIBCALL_COST (2);
10253 if (outer_code
== SET
)
10255 /* Is it a store-flag operation? */
10256 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10257 && XEXP (x
, 1) == const0_rtx
)
10259 /* Thumb also needs an IT insn. */
10260 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10263 if (XEXP (x
, 1) == const0_rtx
)
10268 /* LSR Rd, Rn, #31. */
10270 *cost
+= extra_cost
->alu
.shift
;
10280 *cost
+= COSTS_N_INSNS (1);
10284 /* RSBS T1, Rn, Rn, LSR #31
10286 *cost
+= COSTS_N_INSNS (1);
10288 *cost
+= extra_cost
->alu
.arith_shift
;
10292 /* RSB Rd, Rn, Rn, ASR #1
10293 LSR Rd, Rd, #31. */
10294 *cost
+= COSTS_N_INSNS (1);
10296 *cost
+= (extra_cost
->alu
.arith_shift
10297 + extra_cost
->alu
.shift
);
10303 *cost
+= COSTS_N_INSNS (1);
10305 *cost
+= extra_cost
->alu
.shift
;
10309 /* Remaining cases are either meaningless or would take
10310 three insns anyway. */
10311 *cost
= COSTS_N_INSNS (3);
10314 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10319 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10320 if (CONST_INT_P (XEXP (x
, 1))
10321 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10323 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10330 /* Not directly inside a set. If it involves the condition code
10331 register it must be the condition for a branch, cond_exec or
10332 I_T_E operation. Since the comparison is performed elsewhere
10333 this is just the control part which has no additional
10335 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10336 && XEXP (x
, 1) == const0_rtx
)
10344 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10345 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10348 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10352 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10354 *cost
= LIBCALL_COST (1);
10358 if (mode
== SImode
)
10361 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10365 *cost
= LIBCALL_COST (1);
10369 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10370 && MEM_P (XEXP (x
, 0)))
10372 if (mode
== DImode
)
10373 *cost
+= COSTS_N_INSNS (1);
10378 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10379 *cost
+= extra_cost
->ldst
.load
;
10381 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10383 if (mode
== DImode
)
10384 *cost
+= extra_cost
->alu
.shift
;
10389 /* Widening from less than 32-bits requires an extend operation. */
10390 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10392 /* We have SXTB/SXTH. */
10393 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10395 *cost
+= extra_cost
->alu
.extend
;
10397 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10399 /* Needs two shifts. */
10400 *cost
+= COSTS_N_INSNS (1);
10401 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10403 *cost
+= 2 * extra_cost
->alu
.shift
;
10406 /* Widening beyond 32-bits requires one more insn. */
10407 if (mode
== DImode
)
10409 *cost
+= COSTS_N_INSNS (1);
10411 *cost
+= extra_cost
->alu
.shift
;
10418 || GET_MODE (XEXP (x
, 0)) == SImode
10419 || GET_MODE (XEXP (x
, 0)) == QImode
)
10420 && MEM_P (XEXP (x
, 0)))
10422 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10424 if (mode
== DImode
)
10425 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10430 /* Widening from less than 32-bits requires an extend operation. */
10431 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10433 /* UXTB can be a shorter instruction in Thumb2, but it might
10434 be slower than the AND Rd, Rn, #255 alternative. When
10435 optimizing for speed it should never be slower to use
10436 AND, and we don't really model 16-bit vs 32-bit insns
10439 *cost
+= extra_cost
->alu
.logical
;
10441 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10443 /* We have UXTB/UXTH. */
10444 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10446 *cost
+= extra_cost
->alu
.extend
;
10448 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10450 /* Needs two shifts. It's marginally preferable to use
10451 shifts rather than two BIC instructions as the second
10452 shift may merge with a subsequent insn as a shifter
10454 *cost
= COSTS_N_INSNS (2);
10455 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10457 *cost
+= 2 * extra_cost
->alu
.shift
;
10460 /* Widening beyond 32-bits requires one more insn. */
10461 if (mode
== DImode
)
10463 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10470 /* CONST_INT has no mode, so we cannot tell for sure how many
10471 insns are really going to be needed. The best we can do is
10472 look at the value passed. If it fits in SImode, then assume
10473 that's the mode it will be used for. Otherwise assume it
10474 will be used in DImode. */
10475 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10480 /* Avoid blowing up in arm_gen_constant (). */
10481 if (!(outer_code
== PLUS
10482 || outer_code
== AND
10483 || outer_code
== IOR
10484 || outer_code
== XOR
10485 || outer_code
== MINUS
))
10489 if (mode
== SImode
)
10491 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10492 INTVAL (x
), NULL
, NULL
,
10498 *cost
+= COSTS_N_INSNS (arm_gen_constant
10499 (outer_code
, SImode
, NULL
,
10500 trunc_int_for_mode (INTVAL (x
), SImode
),
10502 + arm_gen_constant (outer_code
, SImode
, NULL
,
10503 INTVAL (x
) >> 32, NULL
,
10515 if (arm_arch_thumb2
&& !flag_pic
)
10516 *cost
+= COSTS_N_INSNS (1);
10518 *cost
+= extra_cost
->ldst
.load
;
10521 *cost
+= COSTS_N_INSNS (1);
10525 *cost
+= COSTS_N_INSNS (1);
10527 *cost
+= extra_cost
->alu
.arith
;
10533 *cost
= COSTS_N_INSNS (4);
10538 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10539 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10541 if (vfp3_const_double_rtx (x
))
10544 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10550 if (mode
== DFmode
)
10551 *cost
+= extra_cost
->ldst
.loadd
;
10553 *cost
+= extra_cost
->ldst
.loadf
;
10556 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10560 *cost
= COSTS_N_INSNS (4);
10566 && TARGET_HARD_FLOAT
10567 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10568 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10569 *cost
= COSTS_N_INSNS (1);
10571 *cost
= COSTS_N_INSNS (4);
10576 /* When optimizing for size, we prefer constant pool entries to
10577 MOVW/MOVT pairs, so bump the cost of these slightly. */
10584 *cost
+= extra_cost
->alu
.clz
;
10588 if (XEXP (x
, 1) == const0_rtx
)
10591 *cost
+= extra_cost
->alu
.log_shift
;
10592 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10595 /* Fall through. */
10599 *cost
+= COSTS_N_INSNS (1);
10603 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10604 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10605 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10606 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10607 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10608 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10609 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10610 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10614 *cost
+= extra_cost
->mult
[1].extend
;
10615 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10616 ZERO_EXTEND
, 0, speed_p
)
10617 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10618 ZERO_EXTEND
, 0, speed_p
));
10621 *cost
= LIBCALL_COST (1);
10624 case UNSPEC_VOLATILE
:
10626 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10629 /* Reading the PC is like reading any other register. Writing it
10630 is more expensive, but we take that into account elsewhere. */
10635 /* TODO: Simple zero_extract of bottom bits using AND. */
10636 /* Fall through. */
10640 && CONST_INT_P (XEXP (x
, 1))
10641 && CONST_INT_P (XEXP (x
, 2)))
10644 *cost
+= extra_cost
->alu
.bfx
;
10645 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10648 /* Without UBFX/SBFX, need to resort to shift operations. */
10649 *cost
+= COSTS_N_INSNS (1);
10651 *cost
+= 2 * extra_cost
->alu
.shift
;
10652 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10656 if (TARGET_HARD_FLOAT
)
10659 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10660 if (!TARGET_FPU_ARMV8
10661 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10663 /* Pre v8, widening HF->DF is a two-step process, first
10664 widening to SFmode. */
10665 *cost
+= COSTS_N_INSNS (1);
10667 *cost
+= extra_cost
->fp
[0].widen
;
10669 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10673 *cost
= LIBCALL_COST (1);
10676 case FLOAT_TRUNCATE
:
10677 if (TARGET_HARD_FLOAT
)
10680 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10681 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10683 /* Vector modes? */
10685 *cost
= LIBCALL_COST (1);
10689 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10691 rtx op0
= XEXP (x
, 0);
10692 rtx op1
= XEXP (x
, 1);
10693 rtx op2
= XEXP (x
, 2);
10696 /* vfms or vfnma. */
10697 if (GET_CODE (op0
) == NEG
)
10698 op0
= XEXP (op0
, 0);
10700 /* vfnms or vfnma. */
10701 if (GET_CODE (op2
) == NEG
)
10702 op2
= XEXP (op2
, 0);
10704 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10705 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10706 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10709 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10714 *cost
= LIBCALL_COST (3);
10719 if (TARGET_HARD_FLOAT
)
10721 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10722 a vcvt fixed-point conversion. */
10723 if (code
== FIX
&& mode
== SImode
10724 && GET_CODE (XEXP (x
, 0)) == FIX
10725 && GET_MODE (XEXP (x
, 0)) == SFmode
10726 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10727 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10731 *cost
+= extra_cost
->fp
[0].toint
;
10733 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10738 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10740 mode
= GET_MODE (XEXP (x
, 0));
10742 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10743 /* Strip of the 'cost' of rounding towards zero. */
10744 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10745 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10748 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10749 /* ??? Increase the cost to deal with transferring from
10750 FP -> CORE registers? */
10753 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10754 && TARGET_FPU_ARMV8
)
10757 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10760 /* Vector costs? */
10762 *cost
= LIBCALL_COST (1);
10766 case UNSIGNED_FLOAT
:
10767 if (TARGET_HARD_FLOAT
)
10769 /* ??? Increase the cost to deal with transferring from CORE
10770 -> FP registers? */
10772 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10775 *cost
= LIBCALL_COST (1);
10783 /* Just a guess. Guess number of instructions in the asm
10784 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10785 though (see PR60663). */
10786 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10787 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10789 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10793 if (mode
!= VOIDmode
)
10794 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10796 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10801 #undef HANDLE_NARROW_SHIFT_ARITH
10803 /* RTX costs entry point. */
10806 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10807 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10810 int code
= GET_CODE (x
);
10811 gcc_assert (current_tune
->insn_extra_cost
);
10813 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10814 (enum rtx_code
) outer_code
,
10815 current_tune
->insn_extra_cost
,
10818 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10820 print_rtl_single (dump_file
, x
);
10821 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10822 *total
, result
? "final" : "partial");
10827 /* All address computations that can be done are free, but rtx cost returns
10828 the same for practically all of them. So we weight the different types
10829 of address here in the order (most pref first):
10830 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10832 arm_arm_address_cost (rtx x
)
10834 enum rtx_code c
= GET_CODE (x
);
10836 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10838 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10843 if (CONST_INT_P (XEXP (x
, 1)))
10846 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10856 arm_thumb_address_cost (rtx x
)
10858 enum rtx_code c
= GET_CODE (x
);
10863 && REG_P (XEXP (x
, 0))
10864 && CONST_INT_P (XEXP (x
, 1)))
10871 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10872 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10874 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10877 /* Adjust cost hook for XScale. */
10879 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10882 /* Some true dependencies can have a higher cost depending
10883 on precisely how certain input operands are used. */
10885 && recog_memoized (insn
) >= 0
10886 && recog_memoized (dep
) >= 0)
10888 int shift_opnum
= get_attr_shift (insn
);
10889 enum attr_type attr_type
= get_attr_type (dep
);
10891 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10892 operand for INSN. If we have a shifted input operand and the
10893 instruction we depend on is another ALU instruction, then we may
10894 have to account for an additional stall. */
10895 if (shift_opnum
!= 0
10896 && (attr_type
== TYPE_ALU_SHIFT_IMM
10897 || attr_type
== TYPE_ALUS_SHIFT_IMM
10898 || attr_type
== TYPE_LOGIC_SHIFT_IMM
10899 || attr_type
== TYPE_LOGICS_SHIFT_IMM
10900 || attr_type
== TYPE_ALU_SHIFT_REG
10901 || attr_type
== TYPE_ALUS_SHIFT_REG
10902 || attr_type
== TYPE_LOGIC_SHIFT_REG
10903 || attr_type
== TYPE_LOGICS_SHIFT_REG
10904 || attr_type
== TYPE_MOV_SHIFT
10905 || attr_type
== TYPE_MVN_SHIFT
10906 || attr_type
== TYPE_MOV_SHIFT_REG
10907 || attr_type
== TYPE_MVN_SHIFT_REG
))
10909 rtx shifted_operand
;
10912 /* Get the shifted operand. */
10913 extract_insn (insn
);
10914 shifted_operand
= recog_data
.operand
[shift_opnum
];
10916 /* Iterate over all the operands in DEP. If we write an operand
10917 that overlaps with SHIFTED_OPERAND, then we have increase the
10918 cost of this dependency. */
10919 extract_insn (dep
);
10920 preprocess_constraints (dep
);
10921 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
10923 /* We can ignore strict inputs. */
10924 if (recog_data
.operand_type
[opno
] == OP_IN
)
10927 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
10939 /* Adjust cost hook for Cortex A9. */
10941 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10951 case REG_DEP_OUTPUT
:
10952 if (recog_memoized (insn
) >= 0
10953 && recog_memoized (dep
) >= 0)
10955 if (GET_CODE (PATTERN (insn
)) == SET
)
10958 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
10960 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
10962 enum attr_type attr_type_insn
= get_attr_type (insn
);
10963 enum attr_type attr_type_dep
= get_attr_type (dep
);
10965 /* By default all dependencies of the form
10968 have an extra latency of 1 cycle because
10969 of the input and output dependency in this
10970 case. However this gets modeled as an true
10971 dependency and hence all these checks. */
10972 if (REG_P (SET_DEST (PATTERN (insn
)))
10973 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
10975 /* FMACS is a special case where the dependent
10976 instruction can be issued 3 cycles before
10977 the normal latency in case of an output
10979 if ((attr_type_insn
== TYPE_FMACS
10980 || attr_type_insn
== TYPE_FMACD
)
10981 && (attr_type_dep
== TYPE_FMACS
10982 || attr_type_dep
== TYPE_FMACD
))
10984 if (dep_type
== REG_DEP_OUTPUT
)
10985 *cost
= insn_default_latency (dep
) - 3;
10987 *cost
= insn_default_latency (dep
);
10992 if (dep_type
== REG_DEP_OUTPUT
)
10993 *cost
= insn_default_latency (dep
) + 1;
10995 *cost
= insn_default_latency (dep
);
11005 gcc_unreachable ();
11011 /* Adjust cost hook for FA726TE. */
11013 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11016 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11017 have penalty of 3. */
11018 if (dep_type
== REG_DEP_TRUE
11019 && recog_memoized (insn
) >= 0
11020 && recog_memoized (dep
) >= 0
11021 && get_attr_conds (dep
) == CONDS_SET
)
11023 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11024 if (get_attr_conds (insn
) == CONDS_USE
11025 && get_attr_type (insn
) != TYPE_BRANCH
)
11031 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11032 || get_attr_conds (insn
) == CONDS_USE
)
11042 /* Implement TARGET_REGISTER_MOVE_COST.
11044 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11045 it is typically more expensive than a single memory access. We set
11046 the cost to less than two memory accesses so that floating
11047 point to integer conversion does not go through memory. */
11050 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11051 reg_class_t from
, reg_class_t to
)
11055 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11056 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11058 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11059 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11061 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11068 if (from
== HI_REGS
|| to
== HI_REGS
)
11075 /* Implement TARGET_MEMORY_MOVE_COST. */
11078 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11079 bool in ATTRIBUTE_UNUSED
)
11085 if (GET_MODE_SIZE (mode
) < 4)
11088 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11092 /* Vectorizer cost model implementation. */
11094 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11096 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11098 int misalign ATTRIBUTE_UNUSED
)
11102 switch (type_of_cost
)
11105 return current_tune
->vec_costs
->scalar_stmt_cost
;
11108 return current_tune
->vec_costs
->scalar_load_cost
;
11111 return current_tune
->vec_costs
->scalar_store_cost
;
11114 return current_tune
->vec_costs
->vec_stmt_cost
;
11117 return current_tune
->vec_costs
->vec_align_load_cost
;
11120 return current_tune
->vec_costs
->vec_store_cost
;
11122 case vec_to_scalar
:
11123 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11125 case scalar_to_vec
:
11126 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11128 case unaligned_load
:
11129 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11131 case unaligned_store
:
11132 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11134 case cond_branch_taken
:
11135 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11137 case cond_branch_not_taken
:
11138 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11141 case vec_promote_demote
:
11142 return current_tune
->vec_costs
->vec_stmt_cost
;
11144 case vec_construct
:
11145 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11146 return elements
/ 2 + 1;
11149 gcc_unreachable ();
11153 /* Implement targetm.vectorize.add_stmt_cost. */
11156 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11157 struct _stmt_vec_info
*stmt_info
, int misalign
,
11158 enum vect_cost_model_location where
)
11160 unsigned *cost
= (unsigned *) data
;
11161 unsigned retval
= 0;
11163 if (flag_vect_cost_model
)
11165 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11166 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11168 /* Statements in an inner loop relative to the loop being
11169 vectorized are weighted more heavily. The value here is
11170 arbitrary and could potentially be improved with analysis. */
11171 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11172 count
*= 50; /* FIXME. */
11174 retval
= (unsigned) (count
* stmt_cost
);
11175 cost
[where
] += retval
;
11181 /* Return true if and only if this insn can dual-issue only as older. */
11183 cortexa7_older_only (rtx_insn
*insn
)
11185 if (recog_memoized (insn
) < 0)
11188 switch (get_attr_type (insn
))
11190 case TYPE_ALU_DSP_REG
:
11191 case TYPE_ALU_SREG
:
11192 case TYPE_ALUS_SREG
:
11193 case TYPE_LOGIC_REG
:
11194 case TYPE_LOGICS_REG
:
11196 case TYPE_ADCS_REG
:
11201 case TYPE_SHIFT_IMM
:
11202 case TYPE_SHIFT_REG
:
11203 case TYPE_LOAD_BYTE
:
11206 case TYPE_FFARITHS
:
11208 case TYPE_FFARITHD
:
11226 case TYPE_F_STORES
:
11233 /* Return true if and only if this insn can dual-issue as younger. */
11235 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11237 if (recog_memoized (insn
) < 0)
11240 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11244 switch (get_attr_type (insn
))
11247 case TYPE_ALUS_IMM
:
11248 case TYPE_LOGIC_IMM
:
11249 case TYPE_LOGICS_IMM
:
11254 case TYPE_MOV_SHIFT
:
11255 case TYPE_MOV_SHIFT_REG
:
11265 /* Look for an instruction that can dual issue only as an older
11266 instruction, and move it in front of any instructions that can
11267 dual-issue as younger, while preserving the relative order of all
11268 other instructions in the ready list. This is a hueuristic to help
11269 dual-issue in later cycles, by postponing issue of more flexible
11270 instructions. This heuristic may affect dual issue opportunities
11271 in the current cycle. */
11273 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11274 int *n_readyp
, int clock
)
11277 int first_older_only
= -1, first_younger
= -1;
11281 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11285 /* Traverse the ready list from the head (the instruction to issue
11286 first), and looking for the first instruction that can issue as
11287 younger and the first instruction that can dual-issue only as
11289 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11291 rtx_insn
*insn
= ready
[i
];
11292 if (cortexa7_older_only (insn
))
11294 first_older_only
= i
;
11296 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11299 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11303 /* Nothing to reorder because either no younger insn found or insn
11304 that can dual-issue only as older appears before any insn that
11305 can dual-issue as younger. */
11306 if (first_younger
== -1)
11309 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11313 /* Nothing to reorder because no older-only insn in the ready list. */
11314 if (first_older_only
== -1)
11317 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11321 /* Move first_older_only insn before first_younger. */
11323 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11324 INSN_UID(ready
[first_older_only
]),
11325 INSN_UID(ready
[first_younger
]));
11326 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11327 for (i
= first_older_only
; i
< first_younger
; i
++)
11329 ready
[i
] = ready
[i
+1];
11332 ready
[i
] = first_older_only_insn
;
11336 /* Implement TARGET_SCHED_REORDER. */
11338 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11343 case TARGET_CPU_cortexa7
:
11344 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11347 /* Do nothing for other cores. */
11351 return arm_issue_rate ();
11354 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11355 It corrects the value of COST based on the relationship between
11356 INSN and DEP through the dependence LINK. It returns the new
11357 value. There is a per-core adjust_cost hook to adjust scheduler costs
11358 and the per-core hook can choose to completely override the generic
11359 adjust_cost function. Only put bits of code into arm_adjust_cost that
11360 are common across all cores. */
11362 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11367 /* When generating Thumb-1 code, we want to place flag-setting operations
11368 close to a conditional branch which depends on them, so that we can
11369 omit the comparison. */
11372 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11373 && recog_memoized (dep
) >= 0
11374 && get_attr_conds (dep
) == CONDS_SET
)
11377 if (current_tune
->sched_adjust_cost
!= NULL
)
11379 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11383 /* XXX Is this strictly true? */
11384 if (dep_type
== REG_DEP_ANTI
11385 || dep_type
== REG_DEP_OUTPUT
)
11388 /* Call insns don't incur a stall, even if they follow a load. */
11393 if ((i_pat
= single_set (insn
)) != NULL
11394 && MEM_P (SET_SRC (i_pat
))
11395 && (d_pat
= single_set (dep
)) != NULL
11396 && MEM_P (SET_DEST (d_pat
)))
11398 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11399 /* This is a load after a store, there is no conflict if the load reads
11400 from a cached area. Assume that loads from the stack, and from the
11401 constant pool are cached, and that others will miss. This is a
11404 if ((GET_CODE (src_mem
) == SYMBOL_REF
11405 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11406 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11407 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11408 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11416 arm_max_conditional_execute (void)
11418 return max_insns_skipped
;
11422 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11425 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11427 return (optimize
> 0) ? 2 : 0;
11431 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11433 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11436 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11437 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11438 sequences of non-executed instructions in IT blocks probably take the same
11439 amount of time as executed instructions (and the IT instruction itself takes
11440 space in icache). This function was experimentally determined to give good
11441 results on a popular embedded benchmark. */
11444 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11446 return (TARGET_32BIT
&& speed_p
) ? 1
11447 : arm_default_branch_cost (speed_p
, predictable_p
);
11451 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11453 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11456 static bool fp_consts_inited
= false;
11458 static REAL_VALUE_TYPE value_fp0
;
11461 init_fp_table (void)
11465 r
= REAL_VALUE_ATOF ("0", DFmode
);
11467 fp_consts_inited
= true;
11470 /* Return TRUE if rtx X is a valid immediate FP constant. */
11472 arm_const_double_rtx (rtx x
)
11474 const REAL_VALUE_TYPE
*r
;
11476 if (!fp_consts_inited
)
11479 r
= CONST_DOUBLE_REAL_VALUE (x
);
11480 if (REAL_VALUE_MINUS_ZERO (*r
))
11483 if (real_equal (r
, &value_fp0
))
11489 /* VFPv3 has a fairly wide range of representable immediates, formed from
11490 "quarter-precision" floating-point values. These can be evaluated using this
11491 formula (with ^ for exponentiation):
11495 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11496 16 <= n <= 31 and 0 <= r <= 7.
11498 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11500 - A (most-significant) is the sign bit.
11501 - BCD are the exponent (encoded as r XOR 3).
11502 - EFGH are the mantissa (encoded as n - 16).
11505 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11506 fconst[sd] instruction, or -1 if X isn't suitable. */
11508 vfp3_const_double_index (rtx x
)
11510 REAL_VALUE_TYPE r
, m
;
11511 int sign
, exponent
;
11512 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11513 unsigned HOST_WIDE_INT mask
;
11514 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11517 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11520 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11522 /* We can't represent these things, so detect them first. */
11523 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11526 /* Extract sign, exponent and mantissa. */
11527 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11528 r
= real_value_abs (&r
);
11529 exponent
= REAL_EXP (&r
);
11530 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11531 highest (sign) bit, with a fixed binary point at bit point_pos.
11532 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11533 bits for the mantissa, this may fail (low bits would be lost). */
11534 real_ldexp (&m
, &r
, point_pos
- exponent
);
11535 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11536 mantissa
= w
.elt (0);
11537 mant_hi
= w
.elt (1);
11539 /* If there are bits set in the low part of the mantissa, we can't
11540 represent this value. */
11544 /* Now make it so that mantissa contains the most-significant bits, and move
11545 the point_pos to indicate that the least-significant bits have been
11547 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11548 mantissa
= mant_hi
;
11550 /* We can permit four significant bits of mantissa only, plus a high bit
11551 which is always 1. */
11552 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11553 if ((mantissa
& mask
) != 0)
11556 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11557 mantissa
>>= point_pos
- 5;
11559 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11560 floating-point immediate zero with Neon using an integer-zero load, but
11561 that case is handled elsewhere.) */
11565 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11567 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11568 normalized significands are in the range [1, 2). (Our mantissa is shifted
11569 left 4 places at this point relative to normalized IEEE754 values). GCC
11570 internally uses [0.5, 1) (see real.c), so the exponent returned from
11571 REAL_EXP must be altered. */
11572 exponent
= 5 - exponent
;
11574 if (exponent
< 0 || exponent
> 7)
11577 /* Sign, mantissa and exponent are now in the correct form to plug into the
11578 formula described in the comment above. */
11579 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11582 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11584 vfp3_const_double_rtx (rtx x
)
11589 return vfp3_const_double_index (x
) != -1;
11592 /* Recognize immediates which can be used in various Neon instructions. Legal
11593 immediates are described by the following table (for VMVN variants, the
11594 bitwise inverse of the constant shown is recognized. In either case, VMOV
11595 is output and the correct instruction to use for a given constant is chosen
11596 by the assembler). The constant shown is replicated across all elements of
11597 the destination vector.
11599 insn elems variant constant (binary)
11600 ---- ----- ------- -----------------
11601 vmov i32 0 00000000 00000000 00000000 abcdefgh
11602 vmov i32 1 00000000 00000000 abcdefgh 00000000
11603 vmov i32 2 00000000 abcdefgh 00000000 00000000
11604 vmov i32 3 abcdefgh 00000000 00000000 00000000
11605 vmov i16 4 00000000 abcdefgh
11606 vmov i16 5 abcdefgh 00000000
11607 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11608 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11609 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11610 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11611 vmvn i16 10 00000000 abcdefgh
11612 vmvn i16 11 abcdefgh 00000000
11613 vmov i32 12 00000000 00000000 abcdefgh 11111111
11614 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11615 vmov i32 14 00000000 abcdefgh 11111111 11111111
11616 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11617 vmov i8 16 abcdefgh
11618 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11619 eeeeeeee ffffffff gggggggg hhhhhhhh
11620 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11621 vmov f32 19 00000000 00000000 00000000 00000000
11623 For case 18, B = !b. Representable values are exactly those accepted by
11624 vfp3_const_double_index, but are output as floating-point numbers rather
11627 For case 19, we will change it to vmov.i32 when assembling.
11629 Variants 0-5 (inclusive) may also be used as immediates for the second
11630 operand of VORR/VBIC instructions.
11632 The INVERSE argument causes the bitwise inverse of the given operand to be
11633 recognized instead (used for recognizing legal immediates for the VAND/VORN
11634 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11635 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11636 output, rather than the real insns vbic/vorr).
11638 INVERSE makes no difference to the recognition of float vectors.
11640 The return value is the variant of immediate as shown in the above table, or
11641 -1 if the given value doesn't match any of the listed patterns.
11644 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11645 rtx
*modconst
, int *elementwidth
)
11647 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11649 for (i = 0; i < idx; i += (STRIDE)) \
11654 immtype = (CLASS); \
11655 elsize = (ELSIZE); \
11659 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11660 unsigned int innersize
;
11661 unsigned char bytes
[16];
11662 int immtype
= -1, matches
;
11663 unsigned int invmask
= inverse
? 0xff : 0;
11664 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11667 n_elts
= CONST_VECTOR_NUNITS (op
);
11671 if (mode
== VOIDmode
)
11675 innersize
= GET_MODE_UNIT_SIZE (mode
);
11677 /* Vectors of float constants. */
11678 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11680 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11682 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11685 /* FP16 vectors cannot be represented. */
11686 if (GET_MODE_INNER (mode
) == HFmode
)
11689 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11690 are distinct in this context. */
11691 if (!const_vec_duplicate_p (op
))
11695 *modconst
= CONST_VECTOR_ELT (op
, 0);
11700 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11706 /* The tricks done in the code below apply for little-endian vector layout.
11707 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11708 FIXME: Implement logic for big-endian vectors. */
11709 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11712 /* Splat vector constant out into a byte vector. */
11713 for (i
= 0; i
< n_elts
; i
++)
11715 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11716 unsigned HOST_WIDE_INT elpart
;
11718 gcc_assert (CONST_INT_P (el
));
11719 elpart
= INTVAL (el
);
11721 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11723 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11724 elpart
>>= BITS_PER_UNIT
;
11728 /* Sanity check. */
11729 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11733 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11734 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11736 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11737 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11739 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11740 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11742 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11743 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11745 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11747 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11749 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11750 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11752 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11753 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11755 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11756 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11758 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11759 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11761 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11763 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11765 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11766 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11768 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11769 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11771 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11772 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11774 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11775 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11777 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11779 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11780 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11788 *elementwidth
= elsize
;
11792 unsigned HOST_WIDE_INT imm
= 0;
11794 /* Un-invert bytes of recognized vector, if necessary. */
11796 for (i
= 0; i
< idx
; i
++)
11797 bytes
[i
] ^= invmask
;
11801 /* FIXME: Broken on 32-bit H_W_I hosts. */
11802 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11804 for (i
= 0; i
< 8; i
++)
11805 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11806 << (i
* BITS_PER_UNIT
);
11808 *modconst
= GEN_INT (imm
);
11812 unsigned HOST_WIDE_INT imm
= 0;
11814 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11815 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11817 *modconst
= GEN_INT (imm
);
11825 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11826 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11827 float elements), and a modified constant (whatever should be output for a
11828 VMOV) in *MODCONST. */
11831 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11832 rtx
*modconst
, int *elementwidth
)
11836 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11842 *modconst
= tmpconst
;
11845 *elementwidth
= tmpwidth
;
11850 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11851 the immediate is valid, write a constant suitable for using as an operand
11852 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11853 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11856 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11857 rtx
*modconst
, int *elementwidth
)
11861 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11863 if (retval
< 0 || retval
> 5)
11867 *modconst
= tmpconst
;
11870 *elementwidth
= tmpwidth
;
11875 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11876 the immediate is valid, write a constant suitable for using as an operand
11877 to VSHR/VSHL to *MODCONST and the corresponding element width to
11878 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11879 because they have different limitations. */
11882 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11883 rtx
*modconst
, int *elementwidth
,
11886 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11887 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11888 unsigned HOST_WIDE_INT last_elt
= 0;
11889 unsigned HOST_WIDE_INT maxshift
;
11891 /* Split vector constant out into a byte vector. */
11892 for (i
= 0; i
< n_elts
; i
++)
11894 rtx el
= CONST_VECTOR_ELT (op
, i
);
11895 unsigned HOST_WIDE_INT elpart
;
11897 if (CONST_INT_P (el
))
11898 elpart
= INTVAL (el
);
11899 else if (CONST_DOUBLE_P (el
))
11902 gcc_unreachable ();
11904 if (i
!= 0 && elpart
!= last_elt
)
11910 /* Shift less than element size. */
11911 maxshift
= innersize
* 8;
11915 /* Left shift immediate value can be from 0 to <size>-1. */
11916 if (last_elt
>= maxshift
)
11921 /* Right shift immediate value can be from 1 to <size>. */
11922 if (last_elt
== 0 || last_elt
> maxshift
)
11927 *elementwidth
= innersize
* 8;
11930 *modconst
= CONST_VECTOR_ELT (op
, 0);
11935 /* Return a string suitable for output of Neon immediate logic operation
11939 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
11940 int inverse
, int quad
)
11942 int width
, is_valid
;
11943 static char templ
[40];
11945 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
11947 gcc_assert (is_valid
!= 0);
11950 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
11952 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
11957 /* Return a string suitable for output of Neon immediate shift operation
11958 (VSHR or VSHL) MNEM. */
11961 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
11962 machine_mode mode
, int quad
,
11965 int width
, is_valid
;
11966 static char templ
[40];
11968 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
11969 gcc_assert (is_valid
!= 0);
11972 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
11974 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
11979 /* Output a sequence of pairwise operations to implement a reduction.
11980 NOTE: We do "too much work" here, because pairwise operations work on two
11981 registers-worth of operands in one go. Unfortunately we can't exploit those
11982 extra calculations to do the full operation in fewer steps, I don't think.
11983 Although all vector elements of the result but the first are ignored, we
11984 actually calculate the same result in each of the elements. An alternative
11985 such as initially loading a vector with zero to use as each of the second
11986 operands would use up an additional register and take an extra instruction,
11987 for no particular gain. */
11990 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
11991 rtx (*reduc
) (rtx
, rtx
, rtx
))
11993 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
11996 for (i
= parts
/ 2; i
>= 1; i
/= 2)
11998 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
11999 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12004 /* If VALS is a vector constant that can be loaded into a register
12005 using VDUP, generate instructions to do so and return an RTX to
12006 assign to the register. Otherwise return NULL_RTX. */
12009 neon_vdup_constant (rtx vals
)
12011 machine_mode mode
= GET_MODE (vals
);
12012 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12015 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12018 if (!const_vec_duplicate_p (vals
, &x
))
12019 /* The elements are not all the same. We could handle repeating
12020 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12021 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12025 /* We can load this constant by using VDUP and a constant in a
12026 single ARM register. This will be cheaper than a vector
12029 x
= copy_to_mode_reg (inner_mode
, x
);
12030 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12033 /* Generate code to load VALS, which is a PARALLEL containing only
12034 constants (for vec_init) or CONST_VECTOR, efficiently into a
12035 register. Returns an RTX to copy into the register, or NULL_RTX
12036 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12039 neon_make_constant (rtx vals
)
12041 machine_mode mode
= GET_MODE (vals
);
12043 rtx const_vec
= NULL_RTX
;
12044 int n_elts
= GET_MODE_NUNITS (mode
);
12048 if (GET_CODE (vals
) == CONST_VECTOR
)
12050 else if (GET_CODE (vals
) == PARALLEL
)
12052 /* A CONST_VECTOR must contain only CONST_INTs and
12053 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12054 Only store valid constants in a CONST_VECTOR. */
12055 for (i
= 0; i
< n_elts
; ++i
)
12057 rtx x
= XVECEXP (vals
, 0, i
);
12058 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12061 if (n_const
== n_elts
)
12062 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12065 gcc_unreachable ();
12067 if (const_vec
!= NULL
12068 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12069 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12071 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12072 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12073 pipeline cycle; creating the constant takes one or two ARM
12074 pipeline cycles. */
12076 else if (const_vec
!= NULL_RTX
)
12077 /* Load from constant pool. On Cortex-A8 this takes two cycles
12078 (for either double or quad vectors). We can not take advantage
12079 of single-cycle VLD1 because we need a PC-relative addressing
12083 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12084 We can not construct an initializer. */
12088 /* Initialize vector TARGET to VALS. */
12091 neon_expand_vector_init (rtx target
, rtx vals
)
12093 machine_mode mode
= GET_MODE (target
);
12094 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12095 int n_elts
= GET_MODE_NUNITS (mode
);
12096 int n_var
= 0, one_var
= -1;
12097 bool all_same
= true;
12101 for (i
= 0; i
< n_elts
; ++i
)
12103 x
= XVECEXP (vals
, 0, i
);
12104 if (!CONSTANT_P (x
))
12105 ++n_var
, one_var
= i
;
12107 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12113 rtx constant
= neon_make_constant (vals
);
12114 if (constant
!= NULL_RTX
)
12116 emit_move_insn (target
, constant
);
12121 /* Splat a single non-constant element if we can. */
12122 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12124 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12125 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12129 /* One field is non-constant. Load constant then overwrite varying
12130 field. This is more efficient than using the stack. */
12133 rtx copy
= copy_rtx (vals
);
12134 rtx index
= GEN_INT (one_var
);
12136 /* Load constant part of vector, substitute neighboring value for
12137 varying element. */
12138 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12139 neon_expand_vector_init (target
, copy
);
12141 /* Insert variable. */
12142 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12146 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12149 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12152 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12155 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12158 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12161 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12164 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12167 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12170 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12173 gcc_unreachable ();
12178 /* Construct the vector in memory one field at a time
12179 and load the whole vector. */
12180 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12181 for (i
= 0; i
< n_elts
; i
++)
12182 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12183 i
* GET_MODE_SIZE (inner_mode
)),
12184 XVECEXP (vals
, 0, i
));
12185 emit_move_insn (target
, mem
);
12188 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12189 ERR if it doesn't. EXP indicates the source location, which includes the
12190 inlining history for intrinsics. */
12193 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12194 const_tree exp
, const char *desc
)
12196 HOST_WIDE_INT lane
;
12198 gcc_assert (CONST_INT_P (operand
));
12200 lane
= INTVAL (operand
);
12202 if (lane
< low
|| lane
>= high
)
12205 error ("%K%s %wd out of range %wd - %wd",
12206 exp
, desc
, lane
, low
, high
- 1);
12208 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12212 /* Bounds-check lanes. */
12215 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12218 bounds_check (operand
, low
, high
, exp
, "lane");
12221 /* Bounds-check constants. */
12224 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12226 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12230 neon_element_bits (machine_mode mode
)
12232 return GET_MODE_UNIT_BITSIZE (mode
);
12236 /* Predicates for `match_operand' and `match_operator'. */
12238 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12239 WB is true if full writeback address modes are allowed and is false
12240 if limited writeback address modes (POST_INC and PRE_DEC) are
12244 arm_coproc_mem_operand (rtx op
, bool wb
)
12248 /* Reject eliminable registers. */
12249 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12250 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12251 || reg_mentioned_p (arg_pointer_rtx
, op
)
12252 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12253 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12254 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12255 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12258 /* Constants are converted into offsets from labels. */
12262 ind
= XEXP (op
, 0);
12264 if (reload_completed
12265 && (GET_CODE (ind
) == LABEL_REF
12266 || (GET_CODE (ind
) == CONST
12267 && GET_CODE (XEXP (ind
, 0)) == PLUS
12268 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12269 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12272 /* Match: (mem (reg)). */
12274 return arm_address_register_rtx_p (ind
, 0);
12276 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12277 acceptable in any case (subject to verification by
12278 arm_address_register_rtx_p). We need WB to be true to accept
12279 PRE_INC and POST_DEC. */
12280 if (GET_CODE (ind
) == POST_INC
12281 || GET_CODE (ind
) == PRE_DEC
12283 && (GET_CODE (ind
) == PRE_INC
12284 || GET_CODE (ind
) == POST_DEC
)))
12285 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12288 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12289 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12290 && GET_CODE (XEXP (ind
, 1)) == PLUS
12291 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12292 ind
= XEXP (ind
, 1);
12297 if (GET_CODE (ind
) == PLUS
12298 && REG_P (XEXP (ind
, 0))
12299 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12300 && CONST_INT_P (XEXP (ind
, 1))
12301 && INTVAL (XEXP (ind
, 1)) > -1024
12302 && INTVAL (XEXP (ind
, 1)) < 1024
12303 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12309 /* Return TRUE if OP is a memory operand which we can load or store a vector
12310 to/from. TYPE is one of the following values:
12311 0 - Vector load/stor (vldr)
12312 1 - Core registers (ldm)
12313 2 - Element/structure loads (vld1)
12316 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12320 /* Reject eliminable registers. */
12321 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12322 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12323 || reg_mentioned_p (arg_pointer_rtx
, op
)
12324 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12325 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12326 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12327 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12330 /* Constants are converted into offsets from labels. */
12334 ind
= XEXP (op
, 0);
12336 if (reload_completed
12337 && (GET_CODE (ind
) == LABEL_REF
12338 || (GET_CODE (ind
) == CONST
12339 && GET_CODE (XEXP (ind
, 0)) == PLUS
12340 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12341 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12344 /* Match: (mem (reg)). */
12346 return arm_address_register_rtx_p (ind
, 0);
12348 /* Allow post-increment with Neon registers. */
12349 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12350 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12351 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12353 /* Allow post-increment by register for VLDn */
12354 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12355 && GET_CODE (XEXP (ind
, 1)) == PLUS
12356 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12363 && GET_CODE (ind
) == PLUS
12364 && REG_P (XEXP (ind
, 0))
12365 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12366 && CONST_INT_P (XEXP (ind
, 1))
12367 && INTVAL (XEXP (ind
, 1)) > -1024
12368 /* For quad modes, we restrict the constant offset to be slightly less
12369 than what the instruction format permits. We have no such constraint
12370 on double mode offsets. (This must match arm_legitimate_index_p.) */
12371 && (INTVAL (XEXP (ind
, 1))
12372 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12373 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12379 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12382 neon_struct_mem_operand (rtx op
)
12386 /* Reject eliminable registers. */
12387 if (! (reload_in_progress
|| reload_completed
)
12388 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12389 || reg_mentioned_p (arg_pointer_rtx
, op
)
12390 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12391 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12392 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12393 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12396 /* Constants are converted into offsets from labels. */
12400 ind
= XEXP (op
, 0);
12402 if (reload_completed
12403 && (GET_CODE (ind
) == LABEL_REF
12404 || (GET_CODE (ind
) == CONST
12405 && GET_CODE (XEXP (ind
, 0)) == PLUS
12406 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12407 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12410 /* Match: (mem (reg)). */
12412 return arm_address_register_rtx_p (ind
, 0);
12414 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12415 if (GET_CODE (ind
) == POST_INC
12416 || GET_CODE (ind
) == PRE_DEC
)
12417 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12422 /* Return true if X is a register that will be eliminated later on. */
12424 arm_eliminable_register (rtx x
)
12426 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12427 || REGNO (x
) == ARG_POINTER_REGNUM
12428 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12429 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12432 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12433 coprocessor registers. Otherwise return NO_REGS. */
12436 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12438 if (mode
== HFmode
)
12440 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12441 return GENERAL_REGS
;
12442 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12444 return GENERAL_REGS
;
12447 /* The neon move patterns handle all legitimate vector and struct
12450 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12451 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12452 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12453 || VALID_NEON_STRUCT_MODE (mode
)))
12456 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12459 return GENERAL_REGS
;
12462 /* Values which must be returned in the most-significant end of the return
12466 arm_return_in_msb (const_tree valtype
)
12468 return (TARGET_AAPCS_BASED
12469 && BYTES_BIG_ENDIAN
12470 && (AGGREGATE_TYPE_P (valtype
)
12471 || TREE_CODE (valtype
) == COMPLEX_TYPE
12472 || FIXED_POINT_TYPE_P (valtype
)));
12475 /* Return TRUE if X references a SYMBOL_REF. */
12477 symbol_mentioned_p (rtx x
)
12482 if (GET_CODE (x
) == SYMBOL_REF
)
12485 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12486 are constant offsets, not symbols. */
12487 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12490 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12492 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12498 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12499 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12502 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12509 /* Return TRUE if X references a LABEL_REF. */
12511 label_mentioned_p (rtx x
)
12516 if (GET_CODE (x
) == LABEL_REF
)
12519 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12520 instruction, but they are constant offsets, not symbols. */
12521 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12524 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12525 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12531 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12532 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12535 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12543 tls_mentioned_p (rtx x
)
12545 switch (GET_CODE (x
))
12548 return tls_mentioned_p (XEXP (x
, 0));
12551 if (XINT (x
, 1) == UNSPEC_TLS
)
12554 /* Fall through. */
12560 /* Must not copy any rtx that uses a pc-relative address.
12561 Also, disallow copying of load-exclusive instructions that
12562 may appear after splitting of compare-and-swap-style operations
12563 so as to prevent those loops from being transformed away from their
12564 canonical forms (see PR 69904). */
12567 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12569 /* The tls call insn cannot be copied, as it is paired with a data
12571 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12574 subrtx_iterator::array_type array
;
12575 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12577 const_rtx x
= *iter
;
12578 if (GET_CODE (x
) == UNSPEC
12579 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12580 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12584 rtx set
= single_set (insn
);
12587 rtx src
= SET_SRC (set
);
12588 if (GET_CODE (src
) == ZERO_EXTEND
)
12589 src
= XEXP (src
, 0);
12591 /* Catch the load-exclusive and load-acquire operations. */
12592 if (GET_CODE (src
) == UNSPEC_VOLATILE
12593 && (XINT (src
, 1) == VUNSPEC_LL
12594 || XINT (src
, 1) == VUNSPEC_LAX
))
12601 minmax_code (rtx x
)
12603 enum rtx_code code
= GET_CODE (x
);
12616 gcc_unreachable ();
12620 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12623 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12624 int *mask
, bool *signed_sat
)
12626 /* The high bound must be a power of two minus one. */
12627 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12631 /* The low bound is either zero (for usat) or one less than the
12632 negation of the high bound (for ssat). */
12633 if (INTVAL (lo_bound
) == 0)
12638 *signed_sat
= false;
12643 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12648 *signed_sat
= true;
12656 /* Return 1 if memory locations are adjacent. */
12658 adjacent_mem_locations (rtx a
, rtx b
)
12660 /* We don't guarantee to preserve the order of these memory refs. */
12661 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12664 if ((REG_P (XEXP (a
, 0))
12665 || (GET_CODE (XEXP (a
, 0)) == PLUS
12666 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12667 && (REG_P (XEXP (b
, 0))
12668 || (GET_CODE (XEXP (b
, 0)) == PLUS
12669 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12671 HOST_WIDE_INT val0
= 0, val1
= 0;
12675 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12677 reg0
= XEXP (XEXP (a
, 0), 0);
12678 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12681 reg0
= XEXP (a
, 0);
12683 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12685 reg1
= XEXP (XEXP (b
, 0), 0);
12686 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12689 reg1
= XEXP (b
, 0);
12691 /* Don't accept any offset that will require multiple
12692 instructions to handle, since this would cause the
12693 arith_adjacentmem pattern to output an overlong sequence. */
12694 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12697 /* Don't allow an eliminable register: register elimination can make
12698 the offset too large. */
12699 if (arm_eliminable_register (reg0
))
12702 val_diff
= val1
- val0
;
12706 /* If the target has load delay slots, then there's no benefit
12707 to using an ldm instruction unless the offset is zero and
12708 we are optimizing for size. */
12709 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12710 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12711 && (val_diff
== 4 || val_diff
== -4));
12714 return ((REGNO (reg0
) == REGNO (reg1
))
12715 && (val_diff
== 4 || val_diff
== -4));
12721 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12722 for load operations, false for store operations. CONSECUTIVE is true
12723 if the register numbers in the operation must be consecutive in the register
12724 bank. RETURN_PC is true if value is to be loaded in PC.
12725 The pattern we are trying to match for load is:
12726 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12727 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12730 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12733 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12734 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12735 3. If consecutive is TRUE, then for kth register being loaded,
12736 REGNO (R_dk) = REGNO (R_d0) + k.
12737 The pattern for store is similar. */
12739 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12740 bool consecutive
, bool return_pc
)
12742 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12743 rtx reg
, mem
, addr
;
12745 unsigned first_regno
;
12746 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12748 bool addr_reg_in_reglist
= false;
12749 bool update
= false;
12754 /* If not in SImode, then registers must be consecutive
12755 (e.g., VLDM instructions for DFmode). */
12756 gcc_assert ((mode
== SImode
) || consecutive
);
12757 /* Setting return_pc for stores is illegal. */
12758 gcc_assert (!return_pc
|| load
);
12760 /* Set up the increments and the regs per val based on the mode. */
12761 reg_increment
= GET_MODE_SIZE (mode
);
12762 regs_per_val
= reg_increment
/ 4;
12763 offset_adj
= return_pc
? 1 : 0;
12766 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12767 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12770 /* Check if this is a write-back. */
12771 elt
= XVECEXP (op
, 0, offset_adj
);
12772 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12778 /* The offset adjustment must be the number of registers being
12779 popped times the size of a single register. */
12780 if (!REG_P (SET_DEST (elt
))
12781 || !REG_P (XEXP (SET_SRC (elt
), 0))
12782 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12783 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12784 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12785 ((count
- 1 - offset_adj
) * reg_increment
))
12789 i
= i
+ offset_adj
;
12790 base
= base
+ offset_adj
;
12791 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12792 success depends on the type: VLDM can do just one reg,
12793 LDM must do at least two. */
12794 if ((count
<= i
) && (mode
== SImode
))
12797 elt
= XVECEXP (op
, 0, i
- 1);
12798 if (GET_CODE (elt
) != SET
)
12803 reg
= SET_DEST (elt
);
12804 mem
= SET_SRC (elt
);
12808 reg
= SET_SRC (elt
);
12809 mem
= SET_DEST (elt
);
12812 if (!REG_P (reg
) || !MEM_P (mem
))
12815 regno
= REGNO (reg
);
12816 first_regno
= regno
;
12817 addr
= XEXP (mem
, 0);
12818 if (GET_CODE (addr
) == PLUS
)
12820 if (!CONST_INT_P (XEXP (addr
, 1)))
12823 offset
= INTVAL (XEXP (addr
, 1));
12824 addr
= XEXP (addr
, 0);
12830 /* Don't allow SP to be loaded unless it is also the base register. It
12831 guarantees that SP is reset correctly when an LDM instruction
12832 is interrupted. Otherwise, we might end up with a corrupt stack. */
12833 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12836 for (; i
< count
; i
++)
12838 elt
= XVECEXP (op
, 0, i
);
12839 if (GET_CODE (elt
) != SET
)
12844 reg
= SET_DEST (elt
);
12845 mem
= SET_SRC (elt
);
12849 reg
= SET_SRC (elt
);
12850 mem
= SET_DEST (elt
);
12854 || GET_MODE (reg
) != mode
12855 || REGNO (reg
) <= regno
12858 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12859 /* Don't allow SP to be loaded unless it is also the base register. It
12860 guarantees that SP is reset correctly when an LDM instruction
12861 is interrupted. Otherwise, we might end up with a corrupt stack. */
12862 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12864 || GET_MODE (mem
) != mode
12865 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12866 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12867 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12868 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12869 offset
+ (i
- base
) * reg_increment
))
12870 && (!REG_P (XEXP (mem
, 0))
12871 || offset
+ (i
- base
) * reg_increment
!= 0)))
12874 regno
= REGNO (reg
);
12875 if (regno
== REGNO (addr
))
12876 addr_reg_in_reglist
= true;
12881 if (update
&& addr_reg_in_reglist
)
12884 /* For Thumb-1, address register is always modified - either by write-back
12885 or by explicit load. If the pattern does not describe an update,
12886 then the address register must be in the list of loaded registers. */
12888 return update
|| addr_reg_in_reglist
;
12894 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12895 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12896 instruction. ADD_OFFSET is nonzero if the base address register needs
12897 to be modified with an add instruction before we can use it. */
12900 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
12901 int nops
, HOST_WIDE_INT add_offset
)
12903 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12904 if the offset isn't small enough. The reason 2 ldrs are faster
12905 is because these ARMs are able to do more than one cache access
12906 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12907 whilst the ARM8 has a double bandwidth cache. This means that
12908 these cores can do both an instruction fetch and a data fetch in
12909 a single cycle, so the trick of calculating the address into a
12910 scratch register (one of the result regs) and then doing a load
12911 multiple actually becomes slower (and no smaller in code size).
12912 That is the transformation
12914 ldr rd1, [rbase + offset]
12915 ldr rd2, [rbase + offset + 4]
12919 add rd1, rbase, offset
12920 ldmia rd1, {rd1, rd2}
12922 produces worse code -- '3 cycles + any stalls on rd2' instead of
12923 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12924 access per cycle, the first sequence could never complete in less
12925 than 6 cycles, whereas the ldm sequence would only take 5 and
12926 would make better use of sequential accesses if not hitting the
12929 We cheat here and test 'arm_ld_sched' which we currently know to
12930 only be true for the ARM8, ARM9 and StrongARM. If this ever
12931 changes, then the test below needs to be reworked. */
12932 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
12935 /* XScale has load-store double instructions, but they have stricter
12936 alignment requirements than load-store multiple, so we cannot
12939 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12940 the pipeline until completion.
12948 An ldr instruction takes 1-3 cycles, but does not block the
12957 Best case ldr will always win. However, the more ldr instructions
12958 we issue, the less likely we are to be able to schedule them well.
12959 Using ldr instructions also increases code size.
12961 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12962 for counts of 3 or 4 regs. */
12963 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
12968 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12969 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12970 an array ORDER which describes the sequence to use when accessing the
12971 offsets that produces an ascending order. In this sequence, each
12972 offset must be larger by exactly 4 than the previous one. ORDER[0]
12973 must have been filled in with the lowest offset by the caller.
12974 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12975 we use to verify that ORDER produces an ascending order of registers.
12976 Return true if it was possible to construct such an order, false if
12980 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
12981 int *unsorted_regs
)
12984 for (i
= 1; i
< nops
; i
++)
12988 order
[i
] = order
[i
- 1];
12989 for (j
= 0; j
< nops
; j
++)
12990 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
12992 /* We must find exactly one offset that is higher than the
12993 previous one by 4. */
12994 if (order
[i
] != order
[i
- 1])
12998 if (order
[i
] == order
[i
- 1])
13000 /* The register numbers must be ascending. */
13001 if (unsorted_regs
!= NULL
13002 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13008 /* Used to determine in a peephole whether a sequence of load
13009 instructions can be changed into a load-multiple instruction.
13010 NOPS is the number of separate load instructions we are examining. The
13011 first NOPS entries in OPERANDS are the destination registers, the
13012 next NOPS entries are memory operands. If this function is
13013 successful, *BASE is set to the common base register of the memory
13014 accesses; *LOAD_OFFSET is set to the first memory location's offset
13015 from that base register.
13016 REGS is an array filled in with the destination register numbers.
13017 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13018 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13019 the sequence of registers in REGS matches the loads from ascending memory
13020 locations, and the function verifies that the register numbers are
13021 themselves ascending. If CHECK_REGS is false, the register numbers
13022 are stored in the order they are found in the operands. */
13024 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13025 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13027 int unsorted_regs
[MAX_LDM_STM_OPS
];
13028 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13029 int order
[MAX_LDM_STM_OPS
];
13030 rtx base_reg_rtx
= NULL
;
13034 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13035 easily extended if required. */
13036 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13038 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13040 /* Loop over the operands and check that the memory references are
13041 suitable (i.e. immediate offsets from the same base register). At
13042 the same time, extract the target register, and the memory
13044 for (i
= 0; i
< nops
; i
++)
13049 /* Convert a subreg of a mem into the mem itself. */
13050 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13051 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13053 gcc_assert (MEM_P (operands
[nops
+ i
]));
13055 /* Don't reorder volatile memory references; it doesn't seem worth
13056 looking for the case where the order is ok anyway. */
13057 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13060 offset
= const0_rtx
;
13062 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13063 || (GET_CODE (reg
) == SUBREG
13064 && REG_P (reg
= SUBREG_REG (reg
))))
13065 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13066 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13067 || (GET_CODE (reg
) == SUBREG
13068 && REG_P (reg
= SUBREG_REG (reg
))))
13069 && (CONST_INT_P (offset
13070 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13074 base_reg
= REGNO (reg
);
13075 base_reg_rtx
= reg
;
13076 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13079 else if (base_reg
!= (int) REGNO (reg
))
13080 /* Not addressed from the same base register. */
13083 unsorted_regs
[i
] = (REG_P (operands
[i
])
13084 ? REGNO (operands
[i
])
13085 : REGNO (SUBREG_REG (operands
[i
])));
13087 /* If it isn't an integer register, or if it overwrites the
13088 base register but isn't the last insn in the list, then
13089 we can't do this. */
13090 if (unsorted_regs
[i
] < 0
13091 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13092 || unsorted_regs
[i
] > 14
13093 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13096 /* Don't allow SP to be loaded unless it is also the base
13097 register. It guarantees that SP is reset correctly when
13098 an LDM instruction is interrupted. Otherwise, we might
13099 end up with a corrupt stack. */
13100 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13103 unsorted_offsets
[i
] = INTVAL (offset
);
13104 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13108 /* Not a suitable memory address. */
13112 /* All the useful information has now been extracted from the
13113 operands into unsorted_regs and unsorted_offsets; additionally,
13114 order[0] has been set to the lowest offset in the list. Sort
13115 the offsets into order, verifying that they are adjacent, and
13116 check that the register numbers are ascending. */
13117 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13118 check_regs
? unsorted_regs
: NULL
))
13122 memcpy (saved_order
, order
, sizeof order
);
13128 for (i
= 0; i
< nops
; i
++)
13129 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13131 *load_offset
= unsorted_offsets
[order
[0]];
13135 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13138 if (unsorted_offsets
[order
[0]] == 0)
13139 ldm_case
= 1; /* ldmia */
13140 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13141 ldm_case
= 2; /* ldmib */
13142 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13143 ldm_case
= 3; /* ldmda */
13144 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13145 ldm_case
= 4; /* ldmdb */
13146 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13147 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13152 if (!multiple_operation_profitable_p (false, nops
,
13154 ? unsorted_offsets
[order
[0]] : 0))
13160 /* Used to determine in a peephole whether a sequence of store instructions can
13161 be changed into a store-multiple instruction.
13162 NOPS is the number of separate store instructions we are examining.
13163 NOPS_TOTAL is the total number of instructions recognized by the peephole
13165 The first NOPS entries in OPERANDS are the source registers, the next
13166 NOPS entries are memory operands. If this function is successful, *BASE is
13167 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13168 to the first memory location's offset from that base register. REGS is an
13169 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13170 likewise filled with the corresponding rtx's.
13171 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13172 numbers to an ascending order of stores.
13173 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13174 from ascending memory locations, and the function verifies that the register
13175 numbers are themselves ascending. If CHECK_REGS is false, the register
13176 numbers are stored in the order they are found in the operands. */
13178 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13179 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13180 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13182 int unsorted_regs
[MAX_LDM_STM_OPS
];
13183 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13184 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13185 int order
[MAX_LDM_STM_OPS
];
13187 rtx base_reg_rtx
= NULL
;
13190 /* Write back of base register is currently only supported for Thumb 1. */
13191 int base_writeback
= TARGET_THUMB1
;
13193 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13194 easily extended if required. */
13195 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13197 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13199 /* Loop over the operands and check that the memory references are
13200 suitable (i.e. immediate offsets from the same base register). At
13201 the same time, extract the target register, and the memory
13203 for (i
= 0; i
< nops
; i
++)
13208 /* Convert a subreg of a mem into the mem itself. */
13209 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13210 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13212 gcc_assert (MEM_P (operands
[nops
+ i
]));
13214 /* Don't reorder volatile memory references; it doesn't seem worth
13215 looking for the case where the order is ok anyway. */
13216 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13219 offset
= const0_rtx
;
13221 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13222 || (GET_CODE (reg
) == SUBREG
13223 && REG_P (reg
= SUBREG_REG (reg
))))
13224 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13225 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13226 || (GET_CODE (reg
) == SUBREG
13227 && REG_P (reg
= SUBREG_REG (reg
))))
13228 && (CONST_INT_P (offset
13229 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13231 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13232 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13233 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13237 base_reg
= REGNO (reg
);
13238 base_reg_rtx
= reg
;
13239 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13242 else if (base_reg
!= (int) REGNO (reg
))
13243 /* Not addressed from the same base register. */
13246 /* If it isn't an integer register, then we can't do this. */
13247 if (unsorted_regs
[i
] < 0
13248 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13249 /* The effects are unpredictable if the base register is
13250 both updated and stored. */
13251 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13252 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13253 || unsorted_regs
[i
] > 14)
13256 unsorted_offsets
[i
] = INTVAL (offset
);
13257 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13261 /* Not a suitable memory address. */
13265 /* All the useful information has now been extracted from the
13266 operands into unsorted_regs and unsorted_offsets; additionally,
13267 order[0] has been set to the lowest offset in the list. Sort
13268 the offsets into order, verifying that they are adjacent, and
13269 check that the register numbers are ascending. */
13270 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13271 check_regs
? unsorted_regs
: NULL
))
13275 memcpy (saved_order
, order
, sizeof order
);
13281 for (i
= 0; i
< nops
; i
++)
13283 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13285 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13288 *load_offset
= unsorted_offsets
[order
[0]];
13292 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13295 if (unsorted_offsets
[order
[0]] == 0)
13296 stm_case
= 1; /* stmia */
13297 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13298 stm_case
= 2; /* stmib */
13299 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13300 stm_case
= 3; /* stmda */
13301 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13302 stm_case
= 4; /* stmdb */
13306 if (!multiple_operation_profitable_p (false, nops
, 0))
13312 /* Routines for use in generating RTL. */
13314 /* Generate a load-multiple instruction. COUNT is the number of loads in
13315 the instruction; REGS and MEMS are arrays containing the operands.
13316 BASEREG is the base register to be used in addressing the memory operands.
13317 WBACK_OFFSET is nonzero if the instruction should update the base
13321 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13322 HOST_WIDE_INT wback_offset
)
13327 if (!multiple_operation_profitable_p (false, count
, 0))
13333 for (i
= 0; i
< count
; i
++)
13334 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13336 if (wback_offset
!= 0)
13337 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13339 seq
= get_insns ();
13345 result
= gen_rtx_PARALLEL (VOIDmode
,
13346 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13347 if (wback_offset
!= 0)
13349 XVECEXP (result
, 0, 0)
13350 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13355 for (j
= 0; i
< count
; i
++, j
++)
13356 XVECEXP (result
, 0, i
)
13357 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13362 /* Generate a store-multiple instruction. COUNT is the number of stores in
13363 the instruction; REGS and MEMS are arrays containing the operands.
13364 BASEREG is the base register to be used in addressing the memory operands.
13365 WBACK_OFFSET is nonzero if the instruction should update the base
13369 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13370 HOST_WIDE_INT wback_offset
)
13375 if (GET_CODE (basereg
) == PLUS
)
13376 basereg
= XEXP (basereg
, 0);
13378 if (!multiple_operation_profitable_p (false, count
, 0))
13384 for (i
= 0; i
< count
; i
++)
13385 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13387 if (wback_offset
!= 0)
13388 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13390 seq
= get_insns ();
13396 result
= gen_rtx_PARALLEL (VOIDmode
,
13397 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13398 if (wback_offset
!= 0)
13400 XVECEXP (result
, 0, 0)
13401 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13406 for (j
= 0; i
< count
; i
++, j
++)
13407 XVECEXP (result
, 0, i
)
13408 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13413 /* Generate either a load-multiple or a store-multiple instruction. This
13414 function can be used in situations where we can start with a single MEM
13415 rtx and adjust its address upwards.
13416 COUNT is the number of operations in the instruction, not counting a
13417 possible update of the base register. REGS is an array containing the
13419 BASEREG is the base register to be used in addressing the memory operands,
13420 which are constructed from BASEMEM.
13421 WRITE_BACK specifies whether the generated instruction should include an
13422 update of the base register.
13423 OFFSETP is used to pass an offset to and from this function; this offset
13424 is not used when constructing the address (instead BASEMEM should have an
13425 appropriate offset in its address), it is used only for setting
13426 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13429 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13430 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13432 rtx mems
[MAX_LDM_STM_OPS
];
13433 HOST_WIDE_INT offset
= *offsetp
;
13436 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13438 if (GET_CODE (basereg
) == PLUS
)
13439 basereg
= XEXP (basereg
, 0);
13441 for (i
= 0; i
< count
; i
++)
13443 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13444 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13452 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13453 write_back
? 4 * count
: 0);
13455 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13456 write_back
? 4 * count
: 0);
13460 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13461 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13463 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13468 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13469 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13471 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13475 /* Called from a peephole2 expander to turn a sequence of loads into an
13476 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13477 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13478 is true if we can reorder the registers because they are used commutatively
13480 Returns true iff we could generate a new instruction. */
13483 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13485 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13486 rtx mems
[MAX_LDM_STM_OPS
];
13487 int i
, j
, base_reg
;
13489 HOST_WIDE_INT offset
;
13490 int write_back
= FALSE
;
13494 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13495 &base_reg
, &offset
, !sort_regs
);
13501 for (i
= 0; i
< nops
- 1; i
++)
13502 for (j
= i
+ 1; j
< nops
; j
++)
13503 if (regs
[i
] > regs
[j
])
13509 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13513 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13514 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13520 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13521 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13523 if (!TARGET_THUMB1
)
13525 base_reg
= regs
[0];
13526 base_reg_rtx
= newbase
;
13530 for (i
= 0; i
< nops
; i
++)
13532 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13533 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13536 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13537 write_back
? offset
+ i
* 4 : 0));
13541 /* Called from a peephole2 expander to turn a sequence of stores into an
13542 STM instruction. OPERANDS are the operands found by the peephole matcher;
13543 NOPS indicates how many separate stores we are trying to combine.
13544 Returns true iff we could generate a new instruction. */
13547 gen_stm_seq (rtx
*operands
, int nops
)
13550 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13551 rtx mems
[MAX_LDM_STM_OPS
];
13554 HOST_WIDE_INT offset
;
13555 int write_back
= FALSE
;
13558 bool base_reg_dies
;
13560 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13561 mem_order
, &base_reg
, &offset
, true);
13566 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13568 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13571 gcc_assert (base_reg_dies
);
13577 gcc_assert (base_reg_dies
);
13578 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13582 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13584 for (i
= 0; i
< nops
; i
++)
13586 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13587 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13590 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13591 write_back
? offset
+ i
* 4 : 0));
13595 /* Called from a peephole2 expander to turn a sequence of stores that are
13596 preceded by constant loads into an STM instruction. OPERANDS are the
13597 operands found by the peephole matcher; NOPS indicates how many
13598 separate stores we are trying to combine; there are 2 * NOPS
13599 instructions in the peephole.
13600 Returns true iff we could generate a new instruction. */
13603 gen_const_stm_seq (rtx
*operands
, int nops
)
13605 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13606 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13607 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13608 rtx mems
[MAX_LDM_STM_OPS
];
13611 HOST_WIDE_INT offset
;
13612 int write_back
= FALSE
;
13615 bool base_reg_dies
;
13617 HARD_REG_SET allocated
;
13619 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13620 mem_order
, &base_reg
, &offset
, false);
13625 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13627 /* If the same register is used more than once, try to find a free
13629 CLEAR_HARD_REG_SET (allocated
);
13630 for (i
= 0; i
< nops
; i
++)
13632 for (j
= i
+ 1; j
< nops
; j
++)
13633 if (regs
[i
] == regs
[j
])
13635 rtx t
= peep2_find_free_register (0, nops
* 2,
13636 TARGET_THUMB1
? "l" : "r",
13637 SImode
, &allocated
);
13641 regs
[i
] = REGNO (t
);
13645 /* Compute an ordering that maps the register numbers to an ascending
13648 for (i
= 0; i
< nops
; i
++)
13649 if (regs
[i
] < regs
[reg_order
[0]])
13652 for (i
= 1; i
< nops
; i
++)
13654 int this_order
= reg_order
[i
- 1];
13655 for (j
= 0; j
< nops
; j
++)
13656 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13657 && (this_order
== reg_order
[i
- 1]
13658 || regs
[j
] < regs
[this_order
]))
13660 reg_order
[i
] = this_order
;
13663 /* Ensure that registers that must be live after the instruction end
13664 up with the correct value. */
13665 for (i
= 0; i
< nops
; i
++)
13667 int this_order
= reg_order
[i
];
13668 if ((this_order
!= mem_order
[i
]
13669 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13670 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13674 /* Load the constants. */
13675 for (i
= 0; i
< nops
; i
++)
13677 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13678 sorted_regs
[i
] = regs
[reg_order
[i
]];
13679 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13682 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13684 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13687 gcc_assert (base_reg_dies
);
13693 gcc_assert (base_reg_dies
);
13694 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13698 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13700 for (i
= 0; i
< nops
; i
++)
13702 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13703 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13706 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13707 write_back
? offset
+ i
* 4 : 0));
13711 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13712 unaligned copies on processors which support unaligned semantics for those
13713 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13714 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13715 An interleave factor of 1 (the minimum) will perform no interleaving.
13716 Load/store multiple are used for aligned addresses where possible. */
13719 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13720 HOST_WIDE_INT length
,
13721 unsigned int interleave_factor
)
13723 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13724 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13725 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13726 HOST_WIDE_INT i
, j
;
13727 HOST_WIDE_INT remaining
= length
, words
;
13728 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13730 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13731 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13732 HOST_WIDE_INT srcoffset
, dstoffset
;
13733 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13736 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13738 /* Use hard registers if we have aligned source or destination so we can use
13739 load/store multiple with contiguous registers. */
13740 if (dst_aligned
|| src_aligned
)
13741 for (i
= 0; i
< interleave_factor
; i
++)
13742 regs
[i
] = gen_rtx_REG (SImode
, i
);
13744 for (i
= 0; i
< interleave_factor
; i
++)
13745 regs
[i
] = gen_reg_rtx (SImode
);
13747 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13748 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13750 srcoffset
= dstoffset
= 0;
13752 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13753 For copying the last bytes we want to subtract this offset again. */
13754 src_autoinc
= dst_autoinc
= 0;
13756 for (i
= 0; i
< interleave_factor
; i
++)
13759 /* Copy BLOCK_SIZE_BYTES chunks. */
13761 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13764 if (src_aligned
&& interleave_factor
> 1)
13766 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13767 TRUE
, srcbase
, &srcoffset
));
13768 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13772 for (j
= 0; j
< interleave_factor
; j
++)
13774 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13776 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13777 srcoffset
+ j
* UNITS_PER_WORD
);
13778 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13780 srcoffset
+= block_size_bytes
;
13784 if (dst_aligned
&& interleave_factor
> 1)
13786 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13787 TRUE
, dstbase
, &dstoffset
));
13788 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13792 for (j
= 0; j
< interleave_factor
; j
++)
13794 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13796 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13797 dstoffset
+ j
* UNITS_PER_WORD
);
13798 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13800 dstoffset
+= block_size_bytes
;
13803 remaining
-= block_size_bytes
;
13806 /* Copy any whole words left (note these aren't interleaved with any
13807 subsequent halfword/byte load/stores in the interests of simplicity). */
13809 words
= remaining
/ UNITS_PER_WORD
;
13811 gcc_assert (words
< interleave_factor
);
13813 if (src_aligned
&& words
> 1)
13815 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13817 src_autoinc
+= UNITS_PER_WORD
* words
;
13821 for (j
= 0; j
< words
; j
++)
13823 addr
= plus_constant (Pmode
, src
,
13824 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13825 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13826 srcoffset
+ j
* UNITS_PER_WORD
);
13828 emit_move_insn (regs
[j
], mem
);
13830 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13832 srcoffset
+= words
* UNITS_PER_WORD
;
13835 if (dst_aligned
&& words
> 1)
13837 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13839 dst_autoinc
+= words
* UNITS_PER_WORD
;
13843 for (j
= 0; j
< words
; j
++)
13845 addr
= plus_constant (Pmode
, dst
,
13846 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13847 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13848 dstoffset
+ j
* UNITS_PER_WORD
);
13850 emit_move_insn (mem
, regs
[j
]);
13852 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13854 dstoffset
+= words
* UNITS_PER_WORD
;
13857 remaining
-= words
* UNITS_PER_WORD
;
13859 gcc_assert (remaining
< 4);
13861 /* Copy a halfword if necessary. */
13863 if (remaining
>= 2)
13865 halfword_tmp
= gen_reg_rtx (SImode
);
13867 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13868 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13869 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13871 /* Either write out immediately, or delay until we've loaded the last
13872 byte, depending on interleave factor. */
13873 if (interleave_factor
== 1)
13875 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13876 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13877 emit_insn (gen_unaligned_storehi (mem
,
13878 gen_lowpart (HImode
, halfword_tmp
)));
13879 halfword_tmp
= NULL
;
13887 gcc_assert (remaining
< 2);
13889 /* Copy last byte. */
13891 if ((remaining
& 1) != 0)
13893 byte_tmp
= gen_reg_rtx (SImode
);
13895 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13896 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13897 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
13899 if (interleave_factor
== 1)
13901 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13902 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13903 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13912 /* Store last halfword if we haven't done so already. */
13916 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13917 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13918 emit_insn (gen_unaligned_storehi (mem
,
13919 gen_lowpart (HImode
, halfword_tmp
)));
13923 /* Likewise for last byte. */
13927 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13928 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13929 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13933 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
13936 /* From mips_adjust_block_mem:
13938 Helper function for doing a loop-based block operation on memory
13939 reference MEM. Each iteration of the loop will operate on LENGTH
13942 Create a new base register for use within the loop and point it to
13943 the start of MEM. Create a new memory reference that uses this
13944 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13947 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
13950 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
13952 /* Although the new mem does not refer to a known location,
13953 it does keep up to LENGTH bytes of alignment. */
13954 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
13955 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
13958 /* From mips_block_move_loop:
13960 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13961 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13962 the memory regions do not overlap. */
13965 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
13966 unsigned int interleave_factor
,
13967 HOST_WIDE_INT bytes_per_iter
)
13969 rtx src_reg
, dest_reg
, final_src
, test
;
13970 HOST_WIDE_INT leftover
;
13972 leftover
= length
% bytes_per_iter
;
13973 length
-= leftover
;
13975 /* Create registers and memory references for use within the loop. */
13976 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
13977 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
13979 /* Calculate the value that SRC_REG should have after the last iteration of
13981 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
13982 0, 0, OPTAB_WIDEN
);
13984 /* Emit the start of the loop. */
13985 rtx_code_label
*label
= gen_label_rtx ();
13986 emit_label (label
);
13988 /* Emit the loop body. */
13989 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
13990 interleave_factor
);
13992 /* Move on to the next block. */
13993 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
13994 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
13996 /* Emit the loop condition. */
13997 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
13998 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14000 /* Mop up any left-over bytes. */
14002 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14005 /* Emit a block move when either the source or destination is unaligned (not
14006 aligned to a four-byte boundary). This may need further tuning depending on
14007 core type, optimize_size setting, etc. */
14010 arm_movmemqi_unaligned (rtx
*operands
)
14012 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14016 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14017 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14018 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14019 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14020 or dst_aligned though: allow more interleaving in those cases since the
14021 resulting code can be smaller. */
14022 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14023 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14026 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14027 interleave_factor
, bytes_per_iter
);
14029 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14030 interleave_factor
);
14034 /* Note that the loop created by arm_block_move_unaligned_loop may be
14035 subject to loop unrolling, which makes tuning this condition a little
14038 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14040 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14047 arm_gen_movmemqi (rtx
*operands
)
14049 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14050 HOST_WIDE_INT srcoffset
, dstoffset
;
14052 rtx src
, dst
, srcbase
, dstbase
;
14053 rtx part_bytes_reg
= NULL
;
14056 if (!CONST_INT_P (operands
[2])
14057 || !CONST_INT_P (operands
[3])
14058 || INTVAL (operands
[2]) > 64)
14061 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14062 return arm_movmemqi_unaligned (operands
);
14064 if (INTVAL (operands
[3]) & 3)
14067 dstbase
= operands
[0];
14068 srcbase
= operands
[1];
14070 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14071 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14073 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14074 out_words_to_go
= INTVAL (operands
[2]) / 4;
14075 last_bytes
= INTVAL (operands
[2]) & 3;
14076 dstoffset
= srcoffset
= 0;
14078 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14079 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14081 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14083 if (in_words_to_go
> 4)
14084 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14085 TRUE
, srcbase
, &srcoffset
));
14087 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14088 src
, FALSE
, srcbase
,
14091 if (out_words_to_go
)
14093 if (out_words_to_go
> 4)
14094 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14095 TRUE
, dstbase
, &dstoffset
));
14096 else if (out_words_to_go
!= 1)
14097 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14098 out_words_to_go
, dst
,
14101 dstbase
, &dstoffset
));
14104 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14105 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14106 if (last_bytes
!= 0)
14108 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14114 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14115 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14118 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14119 if (out_words_to_go
)
14123 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14124 sreg
= copy_to_reg (mem
);
14126 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14127 emit_move_insn (mem
, sreg
);
14130 gcc_assert (!in_words_to_go
); /* Sanity check */
14133 if (in_words_to_go
)
14135 gcc_assert (in_words_to_go
> 0);
14137 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14138 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14141 gcc_assert (!last_bytes
|| part_bytes_reg
);
14143 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14145 rtx tmp
= gen_reg_rtx (SImode
);
14147 /* The bytes we want are in the top end of the word. */
14148 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14149 GEN_INT (8 * (4 - last_bytes
))));
14150 part_bytes_reg
= tmp
;
14154 mem
= adjust_automodify_address (dstbase
, QImode
,
14155 plus_constant (Pmode
, dst
,
14157 dstoffset
+ last_bytes
- 1);
14158 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14162 tmp
= gen_reg_rtx (SImode
);
14163 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14164 part_bytes_reg
= tmp
;
14171 if (last_bytes
> 1)
14173 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14174 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14178 rtx tmp
= gen_reg_rtx (SImode
);
14179 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14180 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14181 part_bytes_reg
= tmp
;
14188 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14189 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14196 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14199 next_consecutive_mem (rtx mem
)
14201 machine_mode mode
= GET_MODE (mem
);
14202 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14203 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14205 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14208 /* Copy using LDRD/STRD instructions whenever possible.
14209 Returns true upon success. */
14211 gen_movmem_ldrd_strd (rtx
*operands
)
14213 unsigned HOST_WIDE_INT len
;
14214 HOST_WIDE_INT align
;
14215 rtx src
, dst
, base
;
14217 bool src_aligned
, dst_aligned
;
14218 bool src_volatile
, dst_volatile
;
14220 gcc_assert (CONST_INT_P (operands
[2]));
14221 gcc_assert (CONST_INT_P (operands
[3]));
14223 len
= UINTVAL (operands
[2]);
14227 /* Maximum alignment we can assume for both src and dst buffers. */
14228 align
= INTVAL (operands
[3]);
14230 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14233 /* Place src and dst addresses in registers
14234 and update the corresponding mem rtx. */
14236 dst_volatile
= MEM_VOLATILE_P (dst
);
14237 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14238 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14239 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14242 src_volatile
= MEM_VOLATILE_P (src
);
14243 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14244 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14245 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14247 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14250 if (src_volatile
|| dst_volatile
)
14253 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14254 if (!(dst_aligned
|| src_aligned
))
14255 return arm_gen_movmemqi (operands
);
14257 /* If the either src or dst is unaligned we'll be accessing it as pairs
14258 of unaligned SImode accesses. Otherwise we can generate DImode
14259 ldrd/strd instructions. */
14260 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14261 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14266 reg0
= gen_reg_rtx (DImode
);
14267 rtx low_reg
= NULL_RTX
;
14268 rtx hi_reg
= NULL_RTX
;
14270 if (!src_aligned
|| !dst_aligned
)
14272 low_reg
= gen_lowpart (SImode
, reg0
);
14273 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14276 emit_move_insn (reg0
, src
);
14279 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14280 src
= next_consecutive_mem (src
);
14281 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14285 emit_move_insn (dst
, reg0
);
14288 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14289 dst
= next_consecutive_mem (dst
);
14290 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14293 src
= next_consecutive_mem (src
);
14294 dst
= next_consecutive_mem (dst
);
14297 gcc_assert (len
< 8);
14300 /* More than a word but less than a double-word to copy. Copy a word. */
14301 reg0
= gen_reg_rtx (SImode
);
14302 src
= adjust_address (src
, SImode
, 0);
14303 dst
= adjust_address (dst
, SImode
, 0);
14305 emit_move_insn (reg0
, src
);
14307 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14310 emit_move_insn (dst
, reg0
);
14312 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14314 src
= next_consecutive_mem (src
);
14315 dst
= next_consecutive_mem (dst
);
14322 /* Copy the remaining bytes. */
14325 dst
= adjust_address (dst
, HImode
, 0);
14326 src
= adjust_address (src
, HImode
, 0);
14327 reg0
= gen_reg_rtx (SImode
);
14329 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14331 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14334 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14336 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14338 src
= next_consecutive_mem (src
);
14339 dst
= next_consecutive_mem (dst
);
14344 dst
= adjust_address (dst
, QImode
, 0);
14345 src
= adjust_address (src
, QImode
, 0);
14346 reg0
= gen_reg_rtx (QImode
);
14347 emit_move_insn (reg0
, src
);
14348 emit_move_insn (dst
, reg0
);
14352 /* Select a dominance comparison mode if possible for a test of the general
14353 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14354 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14355 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14356 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14357 In all cases OP will be either EQ or NE, but we don't need to know which
14358 here. If we are unable to support a dominance comparison we return
14359 CC mode. This will then fail to match for the RTL expressions that
14360 generate this call. */
14362 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14364 enum rtx_code cond1
, cond2
;
14367 /* Currently we will probably get the wrong result if the individual
14368 comparisons are not simple. This also ensures that it is safe to
14369 reverse a comparison if necessary. */
14370 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14372 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14376 /* The if_then_else variant of this tests the second condition if the
14377 first passes, but is true if the first fails. Reverse the first
14378 condition to get a true "inclusive-or" expression. */
14379 if (cond_or
== DOM_CC_NX_OR_Y
)
14380 cond1
= reverse_condition (cond1
);
14382 /* If the comparisons are not equal, and one doesn't dominate the other,
14383 then we can't do this. */
14385 && !comparison_dominates_p (cond1
, cond2
)
14386 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14390 std::swap (cond1
, cond2
);
14395 if (cond_or
== DOM_CC_X_AND_Y
)
14400 case EQ
: return CC_DEQmode
;
14401 case LE
: return CC_DLEmode
;
14402 case LEU
: return CC_DLEUmode
;
14403 case GE
: return CC_DGEmode
;
14404 case GEU
: return CC_DGEUmode
;
14405 default: gcc_unreachable ();
14409 if (cond_or
== DOM_CC_X_AND_Y
)
14421 gcc_unreachable ();
14425 if (cond_or
== DOM_CC_X_AND_Y
)
14437 gcc_unreachable ();
14441 if (cond_or
== DOM_CC_X_AND_Y
)
14442 return CC_DLTUmode
;
14447 return CC_DLTUmode
;
14449 return CC_DLEUmode
;
14453 gcc_unreachable ();
14457 if (cond_or
== DOM_CC_X_AND_Y
)
14458 return CC_DGTUmode
;
14463 return CC_DGTUmode
;
14465 return CC_DGEUmode
;
14469 gcc_unreachable ();
14472 /* The remaining cases only occur when both comparisons are the
14475 gcc_assert (cond1
== cond2
);
14479 gcc_assert (cond1
== cond2
);
14483 gcc_assert (cond1
== cond2
);
14487 gcc_assert (cond1
== cond2
);
14488 return CC_DLEUmode
;
14491 gcc_assert (cond1
== cond2
);
14492 return CC_DGEUmode
;
14495 gcc_unreachable ();
14500 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14502 /* All floating point compares return CCFP if it is an equality
14503 comparison, and CCFPE otherwise. */
14504 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14527 gcc_unreachable ();
14531 /* A compare with a shifted operand. Because of canonicalization, the
14532 comparison will have to be swapped when we emit the assembler. */
14533 if (GET_MODE (y
) == SImode
14534 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14535 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14536 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14537 || GET_CODE (x
) == ROTATERT
))
14540 /* This operation is performed swapped, but since we only rely on the Z
14541 flag we don't need an additional mode. */
14542 if (GET_MODE (y
) == SImode
14543 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14544 && GET_CODE (x
) == NEG
14545 && (op
== EQ
|| op
== NE
))
14548 /* This is a special case that is used by combine to allow a
14549 comparison of a shifted byte load to be split into a zero-extend
14550 followed by a comparison of the shifted integer (only valid for
14551 equalities and unsigned inequalities). */
14552 if (GET_MODE (x
) == SImode
14553 && GET_CODE (x
) == ASHIFT
14554 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14555 && GET_CODE (XEXP (x
, 0)) == SUBREG
14556 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14557 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14558 && (op
== EQ
|| op
== NE
14559 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14560 && CONST_INT_P (y
))
14563 /* A construct for a conditional compare, if the false arm contains
14564 0, then both conditions must be true, otherwise either condition
14565 must be true. Not all conditions are possible, so CCmode is
14566 returned if it can't be done. */
14567 if (GET_CODE (x
) == IF_THEN_ELSE
14568 && (XEXP (x
, 2) == const0_rtx
14569 || XEXP (x
, 2) == const1_rtx
)
14570 && COMPARISON_P (XEXP (x
, 0))
14571 && COMPARISON_P (XEXP (x
, 1)))
14572 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14573 INTVAL (XEXP (x
, 2)));
14575 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14576 if (GET_CODE (x
) == AND
14577 && (op
== EQ
|| op
== NE
)
14578 && COMPARISON_P (XEXP (x
, 0))
14579 && COMPARISON_P (XEXP (x
, 1)))
14580 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14583 if (GET_CODE (x
) == IOR
14584 && (op
== EQ
|| op
== NE
)
14585 && COMPARISON_P (XEXP (x
, 0))
14586 && COMPARISON_P (XEXP (x
, 1)))
14587 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14590 /* An operation (on Thumb) where we want to test for a single bit.
14591 This is done by shifting that bit up into the top bit of a
14592 scratch register; we can then branch on the sign bit. */
14594 && GET_MODE (x
) == SImode
14595 && (op
== EQ
|| op
== NE
)
14596 && GET_CODE (x
) == ZERO_EXTRACT
14597 && XEXP (x
, 1) == const1_rtx
)
14600 /* An operation that sets the condition codes as a side-effect, the
14601 V flag is not set correctly, so we can only use comparisons where
14602 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14604 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14605 if (GET_MODE (x
) == SImode
14607 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14608 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14609 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14610 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14611 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14612 || GET_CODE (x
) == LSHIFTRT
14613 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14614 || GET_CODE (x
) == ROTATERT
14615 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14616 return CC_NOOVmode
;
14618 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14621 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14622 && GET_CODE (x
) == PLUS
14623 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14626 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14632 /* A DImode comparison against zero can be implemented by
14633 or'ing the two halves together. */
14634 if (y
== const0_rtx
)
14637 /* We can do an equality test in three Thumb instructions. */
14647 /* DImode unsigned comparisons can be implemented by cmp +
14648 cmpeq without a scratch register. Not worth doing in
14659 /* DImode signed and unsigned comparisons can be implemented
14660 by cmp + sbcs with a scratch register, but that does not
14661 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14662 gcc_assert (op
!= EQ
&& op
!= NE
);
14666 gcc_unreachable ();
14670 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14671 return GET_MODE (x
);
14676 /* X and Y are two things to compare using CODE. Emit the compare insn and
14677 return the rtx for register 0 in the proper mode. FP means this is a
14678 floating point compare: I don't think that it is needed on the arm. */
14680 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14684 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14686 /* We might have X as a constant, Y as a register because of the predicates
14687 used for cmpdi. If so, force X to a register here. */
14688 if (dimode_comparison
&& !REG_P (x
))
14689 x
= force_reg (DImode
, x
);
14691 mode
= SELECT_CC_MODE (code
, x
, y
);
14692 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14694 if (dimode_comparison
14695 && mode
!= CC_CZmode
)
14699 /* To compare two non-zero values for equality, XOR them and
14700 then compare against zero. Not used for ARM mode; there
14701 CC_CZmode is cheaper. */
14702 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14704 gcc_assert (!reload_completed
);
14705 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14709 /* A scratch register is required. */
14710 if (reload_completed
)
14711 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14713 scratch
= gen_rtx_SCRATCH (SImode
);
14715 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14716 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14717 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14720 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14725 /* Generate a sequence of insns that will generate the correct return
14726 address mask depending on the physical architecture that the program
14729 arm_gen_return_addr_mask (void)
14731 rtx reg
= gen_reg_rtx (Pmode
);
14733 emit_insn (gen_return_addr_mask (reg
));
14738 arm_reload_in_hi (rtx
*operands
)
14740 rtx ref
= operands
[1];
14742 HOST_WIDE_INT offset
= 0;
14744 if (GET_CODE (ref
) == SUBREG
)
14746 offset
= SUBREG_BYTE (ref
);
14747 ref
= SUBREG_REG (ref
);
14752 /* We have a pseudo which has been spilt onto the stack; there
14753 are two cases here: the first where there is a simple
14754 stack-slot replacement and a second where the stack-slot is
14755 out of range, or is used as a subreg. */
14756 if (reg_equiv_mem (REGNO (ref
)))
14758 ref
= reg_equiv_mem (REGNO (ref
));
14759 base
= find_replacement (&XEXP (ref
, 0));
14762 /* The slot is out of range, or was dressed up in a SUBREG. */
14763 base
= reg_equiv_address (REGNO (ref
));
14765 /* PR 62554: If there is no equivalent memory location then just move
14766 the value as an SImode register move. This happens when the target
14767 architecture variant does not have an HImode register move. */
14770 gcc_assert (REG_P (operands
[0]));
14771 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14772 gen_rtx_SUBREG (SImode
, ref
, 0)));
14777 base
= find_replacement (&XEXP (ref
, 0));
14779 /* Handle the case where the address is too complex to be offset by 1. */
14780 if (GET_CODE (base
) == MINUS
14781 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14783 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14785 emit_set_insn (base_plus
, base
);
14788 else if (GET_CODE (base
) == PLUS
)
14790 /* The addend must be CONST_INT, or we would have dealt with it above. */
14791 HOST_WIDE_INT hi
, lo
;
14793 offset
+= INTVAL (XEXP (base
, 1));
14794 base
= XEXP (base
, 0);
14796 /* Rework the address into a legal sequence of insns. */
14797 /* Valid range for lo is -4095 -> 4095 */
14800 : -((-offset
) & 0xfff));
14802 /* Corner case, if lo is the max offset then we would be out of range
14803 once we have added the additional 1 below, so bump the msb into the
14804 pre-loading insn(s). */
14808 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14809 ^ (HOST_WIDE_INT
) 0x80000000)
14810 - (HOST_WIDE_INT
) 0x80000000);
14812 gcc_assert (hi
+ lo
== offset
);
14816 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14818 /* Get the base address; addsi3 knows how to handle constants
14819 that require more than one insn. */
14820 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14826 /* Operands[2] may overlap operands[0] (though it won't overlap
14827 operands[1]), that's why we asked for a DImode reg -- so we can
14828 use the bit that does not overlap. */
14829 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14830 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14832 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14834 emit_insn (gen_zero_extendqisi2 (scratch
,
14835 gen_rtx_MEM (QImode
,
14836 plus_constant (Pmode
, base
,
14838 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14839 gen_rtx_MEM (QImode
,
14840 plus_constant (Pmode
, base
,
14842 if (!BYTES_BIG_ENDIAN
)
14843 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14844 gen_rtx_IOR (SImode
,
14847 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14851 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14852 gen_rtx_IOR (SImode
,
14853 gen_rtx_ASHIFT (SImode
, scratch
,
14855 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14858 /* Handle storing a half-word to memory during reload by synthesizing as two
14859 byte stores. Take care not to clobber the input values until after we
14860 have moved them somewhere safe. This code assumes that if the DImode
14861 scratch in operands[2] overlaps either the input value or output address
14862 in some way, then that value must die in this insn (we absolutely need
14863 two scratch registers for some corner cases). */
14865 arm_reload_out_hi (rtx
*operands
)
14867 rtx ref
= operands
[0];
14868 rtx outval
= operands
[1];
14870 HOST_WIDE_INT offset
= 0;
14872 if (GET_CODE (ref
) == SUBREG
)
14874 offset
= SUBREG_BYTE (ref
);
14875 ref
= SUBREG_REG (ref
);
14880 /* We have a pseudo which has been spilt onto the stack; there
14881 are two cases here: the first where there is a simple
14882 stack-slot replacement and a second where the stack-slot is
14883 out of range, or is used as a subreg. */
14884 if (reg_equiv_mem (REGNO (ref
)))
14886 ref
= reg_equiv_mem (REGNO (ref
));
14887 base
= find_replacement (&XEXP (ref
, 0));
14890 /* The slot is out of range, or was dressed up in a SUBREG. */
14891 base
= reg_equiv_address (REGNO (ref
));
14893 /* PR 62254: If there is no equivalent memory location then just move
14894 the value as an SImode register move. This happens when the target
14895 architecture variant does not have an HImode register move. */
14898 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
14900 if (REG_P (outval
))
14902 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14903 gen_rtx_SUBREG (SImode
, outval
, 0)));
14905 else /* SUBREG_P (outval) */
14907 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
14908 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14909 SUBREG_REG (outval
)));
14911 /* FIXME: Handle other cases ? */
14912 gcc_unreachable ();
14918 base
= find_replacement (&XEXP (ref
, 0));
14920 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14922 /* Handle the case where the address is too complex to be offset by 1. */
14923 if (GET_CODE (base
) == MINUS
14924 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14926 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14928 /* Be careful not to destroy OUTVAL. */
14929 if (reg_overlap_mentioned_p (base_plus
, outval
))
14931 /* Updating base_plus might destroy outval, see if we can
14932 swap the scratch and base_plus. */
14933 if (!reg_overlap_mentioned_p (scratch
, outval
))
14934 std::swap (scratch
, base_plus
);
14937 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14939 /* Be conservative and copy OUTVAL into the scratch now,
14940 this should only be necessary if outval is a subreg
14941 of something larger than a word. */
14942 /* XXX Might this clobber base? I can't see how it can,
14943 since scratch is known to overlap with OUTVAL, and
14944 must be wider than a word. */
14945 emit_insn (gen_movhi (scratch_hi
, outval
));
14946 outval
= scratch_hi
;
14950 emit_set_insn (base_plus
, base
);
14953 else if (GET_CODE (base
) == PLUS
)
14955 /* The addend must be CONST_INT, or we would have dealt with it above. */
14956 HOST_WIDE_INT hi
, lo
;
14958 offset
+= INTVAL (XEXP (base
, 1));
14959 base
= XEXP (base
, 0);
14961 /* Rework the address into a legal sequence of insns. */
14962 /* Valid range for lo is -4095 -> 4095 */
14965 : -((-offset
) & 0xfff));
14967 /* Corner case, if lo is the max offset then we would be out of range
14968 once we have added the additional 1 below, so bump the msb into the
14969 pre-loading insn(s). */
14973 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14974 ^ (HOST_WIDE_INT
) 0x80000000)
14975 - (HOST_WIDE_INT
) 0x80000000);
14977 gcc_assert (hi
+ lo
== offset
);
14981 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14983 /* Be careful not to destroy OUTVAL. */
14984 if (reg_overlap_mentioned_p (base_plus
, outval
))
14986 /* Updating base_plus might destroy outval, see if we
14987 can swap the scratch and base_plus. */
14988 if (!reg_overlap_mentioned_p (scratch
, outval
))
14989 std::swap (scratch
, base_plus
);
14992 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14994 /* Be conservative and copy outval into scratch now,
14995 this should only be necessary if outval is a
14996 subreg of something larger than a word. */
14997 /* XXX Might this clobber base? I can't see how it
14998 can, since scratch is known to overlap with
15000 emit_insn (gen_movhi (scratch_hi
, outval
));
15001 outval
= scratch_hi
;
15005 /* Get the base address; addsi3 knows how to handle constants
15006 that require more than one insn. */
15007 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15013 if (BYTES_BIG_ENDIAN
)
15015 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15016 plus_constant (Pmode
, base
,
15018 gen_lowpart (QImode
, outval
)));
15019 emit_insn (gen_lshrsi3 (scratch
,
15020 gen_rtx_SUBREG (SImode
, outval
, 0),
15022 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15024 gen_lowpart (QImode
, scratch
)));
15028 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15030 gen_lowpart (QImode
, outval
)));
15031 emit_insn (gen_lshrsi3 (scratch
,
15032 gen_rtx_SUBREG (SImode
, outval
, 0),
15034 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15035 plus_constant (Pmode
, base
,
15037 gen_lowpart (QImode
, scratch
)));
15041 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15042 (padded to the size of a word) should be passed in a register. */
15045 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15047 if (TARGET_AAPCS_BASED
)
15048 return must_pass_in_stack_var_size (mode
, type
);
15050 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15054 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15055 Return true if an argument passed on the stack should be padded upwards,
15056 i.e. if the least-significant byte has useful data.
15057 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15058 aggregate types are placed in the lowest memory address. */
15061 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15063 if (!TARGET_AAPCS_BASED
)
15064 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15066 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15073 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15074 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15075 register has useful data, and return the opposite if the most
15076 significant byte does. */
15079 arm_pad_reg_upward (machine_mode mode
,
15080 tree type
, int first ATTRIBUTE_UNUSED
)
15082 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15084 /* For AAPCS, small aggregates, small fixed-point types,
15085 and small complex types are always padded upwards. */
15088 if ((AGGREGATE_TYPE_P (type
)
15089 || TREE_CODE (type
) == COMPLEX_TYPE
15090 || FIXED_POINT_TYPE_P (type
))
15091 && int_size_in_bytes (type
) <= 4)
15096 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15097 && GET_MODE_SIZE (mode
) <= 4)
15102 /* Otherwise, use default padding. */
15103 return !BYTES_BIG_ENDIAN
;
15106 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15107 assuming that the address in the base register is word aligned. */
15109 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15111 HOST_WIDE_INT max_offset
;
15113 /* Offset must be a multiple of 4 in Thumb mode. */
15114 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15119 else if (TARGET_ARM
)
15124 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15127 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15128 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15129 Assumes that the address in the base register RN is word aligned. Pattern
15130 guarantees that both memory accesses use the same base register,
15131 the offsets are constants within the range, and the gap between the offsets is 4.
15132 If preload complete then check that registers are legal. WBACK indicates whether
15133 address is updated. LOAD indicates whether memory access is load or store. */
15135 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15136 bool wback
, bool load
)
15138 unsigned int t
, t2
, n
;
15140 if (!reload_completed
)
15143 if (!offset_ok_for_ldrd_strd (offset
))
15150 if ((TARGET_THUMB2
)
15151 && ((wback
&& (n
== t
|| n
== t2
))
15152 || (t
== SP_REGNUM
)
15153 || (t
== PC_REGNUM
)
15154 || (t2
== SP_REGNUM
)
15155 || (t2
== PC_REGNUM
)
15156 || (!load
&& (n
== PC_REGNUM
))
15157 || (load
&& (t
== t2
))
15158 /* Triggers Cortex-M3 LDRD errata. */
15159 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15163 && ((wback
&& (n
== t
|| n
== t2
))
15164 || (t2
== PC_REGNUM
)
15165 || (t
% 2 != 0) /* First destination register is not even. */
15167 /* PC can be used as base register (for offset addressing only),
15168 but it is depricated. */
15169 || (n
== PC_REGNUM
)))
15175 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15176 operand MEM's address contains an immediate offset from the base
15177 register and has no side effects, in which case it sets BASE and
15178 OFFSET accordingly. */
15180 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15184 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15186 /* TODO: Handle more general memory operand patterns, such as
15187 PRE_DEC and PRE_INC. */
15189 if (side_effects_p (mem
))
15192 /* Can't deal with subregs. */
15193 if (GET_CODE (mem
) == SUBREG
)
15196 gcc_assert (MEM_P (mem
));
15198 *offset
= const0_rtx
;
15200 addr
= XEXP (mem
, 0);
15202 /* If addr isn't valid for DImode, then we can't handle it. */
15203 if (!arm_legitimate_address_p (DImode
, addr
,
15204 reload_in_progress
|| reload_completed
))
15212 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15214 *base
= XEXP (addr
, 0);
15215 *offset
= XEXP (addr
, 1);
15216 return (REG_P (*base
) && CONST_INT_P (*offset
));
15222 /* Called from a peephole2 to replace two word-size accesses with a
15223 single LDRD/STRD instruction. Returns true iff we can generate a
15224 new instruction sequence. That is, both accesses use the same base
15225 register and the gap between constant offsets is 4. This function
15226 may reorder its operands to match ldrd/strd RTL templates.
15227 OPERANDS are the operands found by the peephole matcher;
15228 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15229 corresponding memory operands. LOAD indicaates whether the access
15230 is load or store. CONST_STORE indicates a store of constant
15231 integer values held in OPERANDS[4,5] and assumes that the pattern
15232 is of length 4 insn, for the purpose of checking dead registers.
15233 COMMUTE indicates that register operands may be reordered. */
15235 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15236 bool const_store
, bool commute
)
15239 HOST_WIDE_INT offsets
[2], offset
;
15240 rtx base
= NULL_RTX
;
15241 rtx cur_base
, cur_offset
, tmp
;
15243 HARD_REG_SET regset
;
15245 gcc_assert (!const_store
|| !load
);
15246 /* Check that the memory references are immediate offsets from the
15247 same base register. Extract the base register, the destination
15248 registers, and the corresponding memory offsets. */
15249 for (i
= 0; i
< nops
; i
++)
15251 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15256 else if (REGNO (base
) != REGNO (cur_base
))
15259 offsets
[i
] = INTVAL (cur_offset
);
15260 if (GET_CODE (operands
[i
]) == SUBREG
)
15262 tmp
= SUBREG_REG (operands
[i
]);
15263 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15268 /* Make sure there is no dependency between the individual loads. */
15269 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15270 return false; /* RAW */
15272 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15273 return false; /* WAW */
15275 /* If the same input register is used in both stores
15276 when storing different constants, try to find a free register.
15277 For example, the code
15282 can be transformed into
15286 in Thumb mode assuming that r1 is free.
15287 For ARM mode do the same but only if the starting register
15288 can be made to be even. */
15290 && REGNO (operands
[0]) == REGNO (operands
[1])
15291 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15295 CLEAR_HARD_REG_SET (regset
);
15296 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15297 if (tmp
== NULL_RTX
)
15300 /* Use the new register in the first load to ensure that
15301 if the original input register is not dead after peephole,
15302 then it will have the correct constant value. */
15305 else if (TARGET_ARM
)
15307 int regno
= REGNO (operands
[0]);
15308 if (!peep2_reg_dead_p (4, operands
[0]))
15310 /* When the input register is even and is not dead after the
15311 pattern, it has to hold the second constant but we cannot
15312 form a legal STRD in ARM mode with this register as the second
15314 if (regno
% 2 == 0)
15317 /* Is regno-1 free? */
15318 SET_HARD_REG_SET (regset
);
15319 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15320 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15321 if (tmp
== NULL_RTX
)
15328 /* Find a DImode register. */
15329 CLEAR_HARD_REG_SET (regset
);
15330 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15331 if (tmp
!= NULL_RTX
)
15333 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15334 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15338 /* Can we use the input register to form a DI register? */
15339 SET_HARD_REG_SET (regset
);
15340 CLEAR_HARD_REG_BIT(regset
,
15341 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15342 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15343 if (tmp
== NULL_RTX
)
15345 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15349 gcc_assert (operands
[0] != NULL_RTX
);
15350 gcc_assert (operands
[1] != NULL_RTX
);
15351 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15352 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15356 /* Make sure the instructions are ordered with lower memory access first. */
15357 if (offsets
[0] > offsets
[1])
15359 gap
= offsets
[0] - offsets
[1];
15360 offset
= offsets
[1];
15362 /* Swap the instructions such that lower memory is accessed first. */
15363 std::swap (operands
[0], operands
[1]);
15364 std::swap (operands
[2], operands
[3]);
15366 std::swap (operands
[4], operands
[5]);
15370 gap
= offsets
[1] - offsets
[0];
15371 offset
= offsets
[0];
15374 /* Make sure accesses are to consecutive memory locations. */
15378 /* Make sure we generate legal instructions. */
15379 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15383 /* In Thumb state, where registers are almost unconstrained, there
15384 is little hope to fix it. */
15388 if (load
&& commute
)
15390 /* Try reordering registers. */
15391 std::swap (operands
[0], operands
[1]);
15392 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15399 /* If input registers are dead after this pattern, they can be
15400 reordered or replaced by other registers that are free in the
15401 current pattern. */
15402 if (!peep2_reg_dead_p (4, operands
[0])
15403 || !peep2_reg_dead_p (4, operands
[1]))
15406 /* Try to reorder the input registers. */
15407 /* For example, the code
15412 can be transformed into
15417 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15420 std::swap (operands
[0], operands
[1]);
15424 /* Try to find a free DI register. */
15425 CLEAR_HARD_REG_SET (regset
);
15426 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15427 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15430 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15431 if (tmp
== NULL_RTX
)
15434 /* DREG must be an even-numbered register in DImode.
15435 Split it into SI registers. */
15436 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15437 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15438 gcc_assert (operands
[0] != NULL_RTX
);
15439 gcc_assert (operands
[1] != NULL_RTX
);
15440 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15441 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15443 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15455 /* Print a symbolic form of X to the debug file, F. */
15457 arm_print_value (FILE *f
, rtx x
)
15459 switch (GET_CODE (x
))
15462 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15466 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15474 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15476 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15477 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15485 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15489 fprintf (f
, "`%s'", XSTR (x
, 0));
15493 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15497 arm_print_value (f
, XEXP (x
, 0));
15501 arm_print_value (f
, XEXP (x
, 0));
15503 arm_print_value (f
, XEXP (x
, 1));
15511 fprintf (f
, "????");
15516 /* Routines for manipulation of the constant pool. */
15518 /* Arm instructions cannot load a large constant directly into a
15519 register; they have to come from a pc relative load. The constant
15520 must therefore be placed in the addressable range of the pc
15521 relative load. Depending on the precise pc relative load
15522 instruction the range is somewhere between 256 bytes and 4k. This
15523 means that we often have to dump a constant inside a function, and
15524 generate code to branch around it.
15526 It is important to minimize this, since the branches will slow
15527 things down and make the code larger.
15529 Normally we can hide the table after an existing unconditional
15530 branch so that there is no interruption of the flow, but in the
15531 worst case the code looks like this:
15549 We fix this by performing a scan after scheduling, which notices
15550 which instructions need to have their operands fetched from the
15551 constant table and builds the table.
15553 The algorithm starts by building a table of all the constants that
15554 need fixing up and all the natural barriers in the function (places
15555 where a constant table can be dropped without breaking the flow).
15556 For each fixup we note how far the pc-relative replacement will be
15557 able to reach and the offset of the instruction into the function.
15559 Having built the table we then group the fixes together to form
15560 tables that are as large as possible (subject to addressing
15561 constraints) and emit each table of constants after the last
15562 barrier that is within range of all the instructions in the group.
15563 If a group does not contain a barrier, then we forcibly create one
15564 by inserting a jump instruction into the flow. Once the table has
15565 been inserted, the insns are then modified to reference the
15566 relevant entry in the pool.
15568 Possible enhancements to the algorithm (not implemented) are:
15570 1) For some processors and object formats, there may be benefit in
15571 aligning the pools to the start of cache lines; this alignment
15572 would need to be taken into account when calculating addressability
15575 /* These typedefs are located at the start of this file, so that
15576 they can be used in the prototypes there. This comment is to
15577 remind readers of that fact so that the following structures
15578 can be understood more easily.
15580 typedef struct minipool_node Mnode;
15581 typedef struct minipool_fixup Mfix; */
15583 struct minipool_node
15585 /* Doubly linked chain of entries. */
15588 /* The maximum offset into the code that this entry can be placed. While
15589 pushing fixes for forward references, all entries are sorted in order
15590 of increasing max_address. */
15591 HOST_WIDE_INT max_address
;
15592 /* Similarly for an entry inserted for a backwards ref. */
15593 HOST_WIDE_INT min_address
;
15594 /* The number of fixes referencing this entry. This can become zero
15595 if we "unpush" an entry. In this case we ignore the entry when we
15596 come to emit the code. */
15598 /* The offset from the start of the minipool. */
15599 HOST_WIDE_INT offset
;
15600 /* The value in table. */
15602 /* The mode of value. */
15604 /* The size of the value. With iWMMXt enabled
15605 sizes > 4 also imply an alignment of 8-bytes. */
15609 struct minipool_fixup
15613 HOST_WIDE_INT address
;
15619 HOST_WIDE_INT forwards
;
15620 HOST_WIDE_INT backwards
;
15623 /* Fixes less than a word need padding out to a word boundary. */
15624 #define MINIPOOL_FIX_SIZE(mode) \
15625 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15627 static Mnode
* minipool_vector_head
;
15628 static Mnode
* minipool_vector_tail
;
15629 static rtx_code_label
*minipool_vector_label
;
15630 static int minipool_pad
;
15632 /* The linked list of all minipool fixes required for this function. */
15633 Mfix
* minipool_fix_head
;
15634 Mfix
* minipool_fix_tail
;
15635 /* The fix entry for the current minipool, once it has been placed. */
15636 Mfix
* minipool_barrier
;
15638 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15639 #define JUMP_TABLES_IN_TEXT_SECTION 0
15642 static HOST_WIDE_INT
15643 get_jump_table_size (rtx_jump_table_data
*insn
)
15645 /* ADDR_VECs only take room if read-only data does into the text
15647 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15649 rtx body
= PATTERN (insn
);
15650 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15651 HOST_WIDE_INT size
;
15652 HOST_WIDE_INT modesize
;
15654 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15655 size
= modesize
* XVECLEN (body
, elt
);
15659 /* Round up size of TBB table to a halfword boundary. */
15660 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15663 /* No padding necessary for TBH. */
15666 /* Add two bytes for alignment on Thumb. */
15671 gcc_unreachable ();
15679 /* Return the maximum amount of padding that will be inserted before
15682 static HOST_WIDE_INT
15683 get_label_padding (rtx label
)
15685 HOST_WIDE_INT align
, min_insn_size
;
15687 align
= 1 << label_to_alignment (label
);
15688 min_insn_size
= TARGET_THUMB
? 2 : 4;
15689 return align
> min_insn_size
? align
- min_insn_size
: 0;
15692 /* Move a minipool fix MP from its current location to before MAX_MP.
15693 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15694 constraints may need updating. */
15696 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15697 HOST_WIDE_INT max_address
)
15699 /* The code below assumes these are different. */
15700 gcc_assert (mp
!= max_mp
);
15702 if (max_mp
== NULL
)
15704 if (max_address
< mp
->max_address
)
15705 mp
->max_address
= max_address
;
15709 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15710 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15712 mp
->max_address
= max_address
;
15714 /* Unlink MP from its current position. Since max_mp is non-null,
15715 mp->prev must be non-null. */
15716 mp
->prev
->next
= mp
->next
;
15717 if (mp
->next
!= NULL
)
15718 mp
->next
->prev
= mp
->prev
;
15720 minipool_vector_tail
= mp
->prev
;
15722 /* Re-insert it before MAX_MP. */
15724 mp
->prev
= max_mp
->prev
;
15727 if (mp
->prev
!= NULL
)
15728 mp
->prev
->next
= mp
;
15730 minipool_vector_head
= mp
;
15733 /* Save the new entry. */
15736 /* Scan over the preceding entries and adjust their addresses as
15738 while (mp
->prev
!= NULL
15739 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15741 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15748 /* Add a constant to the minipool for a forward reference. Returns the
15749 node added or NULL if the constant will not fit in this pool. */
15751 add_minipool_forward_ref (Mfix
*fix
)
15753 /* If set, max_mp is the first pool_entry that has a lower
15754 constraint than the one we are trying to add. */
15755 Mnode
* max_mp
= NULL
;
15756 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15759 /* If the minipool starts before the end of FIX->INSN then this FIX
15760 can not be placed into the current pool. Furthermore, adding the
15761 new constant pool entry may cause the pool to start FIX_SIZE bytes
15763 if (minipool_vector_head
&&
15764 (fix
->address
+ get_attr_length (fix
->insn
)
15765 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15768 /* Scan the pool to see if a constant with the same value has
15769 already been added. While we are doing this, also note the
15770 location where we must insert the constant if it doesn't already
15772 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15774 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15775 && fix
->mode
== mp
->mode
15776 && (!LABEL_P (fix
->value
)
15777 || (CODE_LABEL_NUMBER (fix
->value
)
15778 == CODE_LABEL_NUMBER (mp
->value
)))
15779 && rtx_equal_p (fix
->value
, mp
->value
))
15781 /* More than one fix references this entry. */
15783 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15786 /* Note the insertion point if necessary. */
15788 && mp
->max_address
> max_address
)
15791 /* If we are inserting an 8-bytes aligned quantity and
15792 we have not already found an insertion point, then
15793 make sure that all such 8-byte aligned quantities are
15794 placed at the start of the pool. */
15795 if (ARM_DOUBLEWORD_ALIGN
15797 && fix
->fix_size
>= 8
15798 && mp
->fix_size
< 8)
15801 max_address
= mp
->max_address
;
15805 /* The value is not currently in the minipool, so we need to create
15806 a new entry for it. If MAX_MP is NULL, the entry will be put on
15807 the end of the list since the placement is less constrained than
15808 any existing entry. Otherwise, we insert the new fix before
15809 MAX_MP and, if necessary, adjust the constraints on the other
15812 mp
->fix_size
= fix
->fix_size
;
15813 mp
->mode
= fix
->mode
;
15814 mp
->value
= fix
->value
;
15816 /* Not yet required for a backwards ref. */
15817 mp
->min_address
= -65536;
15819 if (max_mp
== NULL
)
15821 mp
->max_address
= max_address
;
15823 mp
->prev
= minipool_vector_tail
;
15825 if (mp
->prev
== NULL
)
15827 minipool_vector_head
= mp
;
15828 minipool_vector_label
= gen_label_rtx ();
15831 mp
->prev
->next
= mp
;
15833 minipool_vector_tail
= mp
;
15837 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15838 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15840 mp
->max_address
= max_address
;
15843 mp
->prev
= max_mp
->prev
;
15845 if (mp
->prev
!= NULL
)
15846 mp
->prev
->next
= mp
;
15848 minipool_vector_head
= mp
;
15851 /* Save the new entry. */
15854 /* Scan over the preceding entries and adjust their addresses as
15856 while (mp
->prev
!= NULL
15857 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15859 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15867 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15868 HOST_WIDE_INT min_address
)
15870 HOST_WIDE_INT offset
;
15872 /* The code below assumes these are different. */
15873 gcc_assert (mp
!= min_mp
);
15875 if (min_mp
== NULL
)
15877 if (min_address
> mp
->min_address
)
15878 mp
->min_address
= min_address
;
15882 /* We will adjust this below if it is too loose. */
15883 mp
->min_address
= min_address
;
15885 /* Unlink MP from its current position. Since min_mp is non-null,
15886 mp->next must be non-null. */
15887 mp
->next
->prev
= mp
->prev
;
15888 if (mp
->prev
!= NULL
)
15889 mp
->prev
->next
= mp
->next
;
15891 minipool_vector_head
= mp
->next
;
15893 /* Reinsert it after MIN_MP. */
15895 mp
->next
= min_mp
->next
;
15897 if (mp
->next
!= NULL
)
15898 mp
->next
->prev
= mp
;
15900 minipool_vector_tail
= mp
;
15906 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15908 mp
->offset
= offset
;
15909 if (mp
->refcount
> 0)
15910 offset
+= mp
->fix_size
;
15912 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15913 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15919 /* Add a constant to the minipool for a backward reference. Returns the
15920 node added or NULL if the constant will not fit in this pool.
15922 Note that the code for insertion for a backwards reference can be
15923 somewhat confusing because the calculated offsets for each fix do
15924 not take into account the size of the pool (which is still under
15927 add_minipool_backward_ref (Mfix
*fix
)
15929 /* If set, min_mp is the last pool_entry that has a lower constraint
15930 than the one we are trying to add. */
15931 Mnode
*min_mp
= NULL
;
15932 /* This can be negative, since it is only a constraint. */
15933 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
15936 /* If we can't reach the current pool from this insn, or if we can't
15937 insert this entry at the end of the pool without pushing other
15938 fixes out of range, then we don't try. This ensures that we
15939 can't fail later on. */
15940 if (min_address
>= minipool_barrier
->address
15941 || (minipool_vector_tail
->min_address
+ fix
->fix_size
15942 >= minipool_barrier
->address
))
15945 /* Scan the pool to see if a constant with the same value has
15946 already been added. While we are doing this, also note the
15947 location where we must insert the constant if it doesn't already
15949 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
15951 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15952 && fix
->mode
== mp
->mode
15953 && (!LABEL_P (fix
->value
)
15954 || (CODE_LABEL_NUMBER (fix
->value
)
15955 == CODE_LABEL_NUMBER (mp
->value
)))
15956 && rtx_equal_p (fix
->value
, mp
->value
)
15957 /* Check that there is enough slack to move this entry to the
15958 end of the table (this is conservative). */
15959 && (mp
->max_address
15960 > (minipool_barrier
->address
15961 + minipool_vector_tail
->offset
15962 + minipool_vector_tail
->fix_size
)))
15965 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
15968 if (min_mp
!= NULL
)
15969 mp
->min_address
+= fix
->fix_size
;
15972 /* Note the insertion point if necessary. */
15973 if (mp
->min_address
< min_address
)
15975 /* For now, we do not allow the insertion of 8-byte alignment
15976 requiring nodes anywhere but at the start of the pool. */
15977 if (ARM_DOUBLEWORD_ALIGN
15978 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15983 else if (mp
->max_address
15984 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
15986 /* Inserting before this entry would push the fix beyond
15987 its maximum address (which can happen if we have
15988 re-located a forwards fix); force the new fix to come
15990 if (ARM_DOUBLEWORD_ALIGN
15991 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15996 min_address
= mp
->min_address
+ fix
->fix_size
;
15999 /* Do not insert a non-8-byte aligned quantity before 8-byte
16000 aligned quantities. */
16001 else if (ARM_DOUBLEWORD_ALIGN
16002 && fix
->fix_size
< 8
16003 && mp
->fix_size
>= 8)
16006 min_address
= mp
->min_address
+ fix
->fix_size
;
16011 /* We need to create a new entry. */
16013 mp
->fix_size
= fix
->fix_size
;
16014 mp
->mode
= fix
->mode
;
16015 mp
->value
= fix
->value
;
16017 mp
->max_address
= minipool_barrier
->address
+ 65536;
16019 mp
->min_address
= min_address
;
16021 if (min_mp
== NULL
)
16024 mp
->next
= minipool_vector_head
;
16026 if (mp
->next
== NULL
)
16028 minipool_vector_tail
= mp
;
16029 minipool_vector_label
= gen_label_rtx ();
16032 mp
->next
->prev
= mp
;
16034 minipool_vector_head
= mp
;
16038 mp
->next
= min_mp
->next
;
16042 if (mp
->next
!= NULL
)
16043 mp
->next
->prev
= mp
;
16045 minipool_vector_tail
= mp
;
16048 /* Save the new entry. */
16056 /* Scan over the following entries and adjust their offsets. */
16057 while (mp
->next
!= NULL
)
16059 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16060 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16063 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16065 mp
->next
->offset
= mp
->offset
;
16074 assign_minipool_offsets (Mfix
*barrier
)
16076 HOST_WIDE_INT offset
= 0;
16079 minipool_barrier
= barrier
;
16081 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16083 mp
->offset
= offset
;
16085 if (mp
->refcount
> 0)
16086 offset
+= mp
->fix_size
;
16090 /* Output the literal table */
16092 dump_minipool (rtx_insn
*scan
)
16098 if (ARM_DOUBLEWORD_ALIGN
)
16099 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16100 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16107 fprintf (dump_file
,
16108 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16109 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16111 scan
= emit_label_after (gen_label_rtx (), scan
);
16112 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16113 scan
= emit_label_after (minipool_vector_label
, scan
);
16115 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16117 if (mp
->refcount
> 0)
16121 fprintf (dump_file
,
16122 ";; Offset %u, min %ld, max %ld ",
16123 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16124 (unsigned long) mp
->max_address
);
16125 arm_print_value (dump_file
, mp
->value
);
16126 fputc ('\n', dump_file
);
16129 rtx val
= copy_rtx (mp
->value
);
16131 switch (GET_MODE_SIZE (mp
->mode
))
16133 #ifdef HAVE_consttable_1
16135 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16139 #ifdef HAVE_consttable_2
16141 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16145 #ifdef HAVE_consttable_4
16147 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16151 #ifdef HAVE_consttable_8
16153 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16157 #ifdef HAVE_consttable_16
16159 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16164 gcc_unreachable ();
16172 minipool_vector_head
= minipool_vector_tail
= NULL
;
16173 scan
= emit_insn_after (gen_consttable_end (), scan
);
16174 scan
= emit_barrier_after (scan
);
16177 /* Return the cost of forcibly inserting a barrier after INSN. */
16179 arm_barrier_cost (rtx_insn
*insn
)
16181 /* Basing the location of the pool on the loop depth is preferable,
16182 but at the moment, the basic block information seems to be
16183 corrupt by this stage of the compilation. */
16184 int base_cost
= 50;
16185 rtx_insn
*next
= next_nonnote_insn (insn
);
16187 if (next
!= NULL
&& LABEL_P (next
))
16190 switch (GET_CODE (insn
))
16193 /* It will always be better to place the table before the label, rather
16202 return base_cost
- 10;
16205 return base_cost
+ 10;
16209 /* Find the best place in the insn stream in the range
16210 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16211 Create the barrier by inserting a jump and add a new fix entry for
16214 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16216 HOST_WIDE_INT count
= 0;
16217 rtx_barrier
*barrier
;
16218 rtx_insn
*from
= fix
->insn
;
16219 /* The instruction after which we will insert the jump. */
16220 rtx_insn
*selected
= NULL
;
16222 /* The address at which the jump instruction will be placed. */
16223 HOST_WIDE_INT selected_address
;
16225 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16226 rtx_code_label
*label
= gen_label_rtx ();
16228 selected_cost
= arm_barrier_cost (from
);
16229 selected_address
= fix
->address
;
16231 while (from
&& count
< max_count
)
16233 rtx_jump_table_data
*tmp
;
16236 /* This code shouldn't have been called if there was a natural barrier
16238 gcc_assert (!BARRIER_P (from
));
16240 /* Count the length of this insn. This must stay in sync with the
16241 code that pushes minipool fixes. */
16242 if (LABEL_P (from
))
16243 count
+= get_label_padding (from
);
16245 count
+= get_attr_length (from
);
16247 /* If there is a jump table, add its length. */
16248 if (tablejump_p (from
, NULL
, &tmp
))
16250 count
+= get_jump_table_size (tmp
);
16252 /* Jump tables aren't in a basic block, so base the cost on
16253 the dispatch insn. If we select this location, we will
16254 still put the pool after the table. */
16255 new_cost
= arm_barrier_cost (from
);
16257 if (count
< max_count
16258 && (!selected
|| new_cost
<= selected_cost
))
16261 selected_cost
= new_cost
;
16262 selected_address
= fix
->address
+ count
;
16265 /* Continue after the dispatch table. */
16266 from
= NEXT_INSN (tmp
);
16270 new_cost
= arm_barrier_cost (from
);
16272 if (count
< max_count
16273 && (!selected
|| new_cost
<= selected_cost
))
16276 selected_cost
= new_cost
;
16277 selected_address
= fix
->address
+ count
;
16280 from
= NEXT_INSN (from
);
16283 /* Make sure that we found a place to insert the jump. */
16284 gcc_assert (selected
);
16286 /* Make sure we do not split a call and its corresponding
16287 CALL_ARG_LOCATION note. */
16288 if (CALL_P (selected
))
16290 rtx_insn
*next
= NEXT_INSN (selected
);
16291 if (next
&& NOTE_P (next
)
16292 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16296 /* Create a new JUMP_INSN that branches around a barrier. */
16297 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16298 JUMP_LABEL (from
) = label
;
16299 barrier
= emit_barrier_after (from
);
16300 emit_label_after (label
, barrier
);
16302 /* Create a minipool barrier entry for the new barrier. */
16303 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16304 new_fix
->insn
= barrier
;
16305 new_fix
->address
= selected_address
;
16306 new_fix
->next
= fix
->next
;
16307 fix
->next
= new_fix
;
16312 /* Record that there is a natural barrier in the insn stream at
16315 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16317 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16320 fix
->address
= address
;
16323 if (minipool_fix_head
!= NULL
)
16324 minipool_fix_tail
->next
= fix
;
16326 minipool_fix_head
= fix
;
16328 minipool_fix_tail
= fix
;
16331 /* Record INSN, which will need fixing up to load a value from the
16332 minipool. ADDRESS is the offset of the insn since the start of the
16333 function; LOC is a pointer to the part of the insn which requires
16334 fixing; VALUE is the constant that must be loaded, which is of type
16337 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16338 machine_mode mode
, rtx value
)
16340 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16343 fix
->address
= address
;
16346 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16347 fix
->value
= value
;
16348 fix
->forwards
= get_attr_pool_range (insn
);
16349 fix
->backwards
= get_attr_neg_pool_range (insn
);
16350 fix
->minipool
= NULL
;
16352 /* If an insn doesn't have a range defined for it, then it isn't
16353 expecting to be reworked by this code. Better to stop now than
16354 to generate duff assembly code. */
16355 gcc_assert (fix
->forwards
|| fix
->backwards
);
16357 /* If an entry requires 8-byte alignment then assume all constant pools
16358 require 4 bytes of padding. Trying to do this later on a per-pool
16359 basis is awkward because existing pool entries have to be modified. */
16360 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16365 fprintf (dump_file
,
16366 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16367 GET_MODE_NAME (mode
),
16368 INSN_UID (insn
), (unsigned long) address
,
16369 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16370 arm_print_value (dump_file
, fix
->value
);
16371 fprintf (dump_file
, "\n");
16374 /* Add it to the chain of fixes. */
16377 if (minipool_fix_head
!= NULL
)
16378 minipool_fix_tail
->next
= fix
;
16380 minipool_fix_head
= fix
;
16382 minipool_fix_tail
= fix
;
16385 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16386 Returns the number of insns needed, or 99 if we always want to synthesize
16389 arm_max_const_double_inline_cost ()
16391 /* Let the value get synthesized to avoid the use of literal pools. */
16392 if (arm_disable_literal_pool
)
16395 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16398 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16399 Returns the number of insns needed, or 99 if we don't know how to
16402 arm_const_double_inline_cost (rtx val
)
16404 rtx lowpart
, highpart
;
16407 mode
= GET_MODE (val
);
16409 if (mode
== VOIDmode
)
16412 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16414 lowpart
= gen_lowpart (SImode
, val
);
16415 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16417 gcc_assert (CONST_INT_P (lowpart
));
16418 gcc_assert (CONST_INT_P (highpart
));
16420 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16421 NULL_RTX
, NULL_RTX
, 0, 0)
16422 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16423 NULL_RTX
, NULL_RTX
, 0, 0));
16426 /* Cost of loading a SImode constant. */
16428 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16430 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16431 NULL_RTX
, NULL_RTX
, 1, 0);
16434 /* Return true if it is worthwhile to split a 64-bit constant into two
16435 32-bit operations. This is the case if optimizing for size, or
16436 if we have load delay slots, or if one 32-bit part can be done with
16437 a single data operation. */
16439 arm_const_double_by_parts (rtx val
)
16441 machine_mode mode
= GET_MODE (val
);
16444 if (optimize_size
|| arm_ld_sched
)
16447 if (mode
== VOIDmode
)
16450 part
= gen_highpart_mode (SImode
, mode
, val
);
16452 gcc_assert (CONST_INT_P (part
));
16454 if (const_ok_for_arm (INTVAL (part
))
16455 || const_ok_for_arm (~INTVAL (part
)))
16458 part
= gen_lowpart (SImode
, val
);
16460 gcc_assert (CONST_INT_P (part
));
16462 if (const_ok_for_arm (INTVAL (part
))
16463 || const_ok_for_arm (~INTVAL (part
)))
16469 /* Return true if it is possible to inline both the high and low parts
16470 of a 64-bit constant into 32-bit data processing instructions. */
16472 arm_const_double_by_immediates (rtx val
)
16474 machine_mode mode
= GET_MODE (val
);
16477 if (mode
== VOIDmode
)
16480 part
= gen_highpart_mode (SImode
, mode
, val
);
16482 gcc_assert (CONST_INT_P (part
));
16484 if (!const_ok_for_arm (INTVAL (part
)))
16487 part
= gen_lowpart (SImode
, val
);
16489 gcc_assert (CONST_INT_P (part
));
16491 if (!const_ok_for_arm (INTVAL (part
)))
16497 /* Scan INSN and note any of its operands that need fixing.
16498 If DO_PUSHES is false we do not actually push any of the fixups
16501 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16505 extract_constrain_insn (insn
);
16507 if (recog_data
.n_alternatives
== 0)
16510 /* Fill in recog_op_alt with information about the constraints of
16512 preprocess_constraints (insn
);
16514 const operand_alternative
*op_alt
= which_op_alt ();
16515 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16517 /* Things we need to fix can only occur in inputs. */
16518 if (recog_data
.operand_type
[opno
] != OP_IN
)
16521 /* If this alternative is a memory reference, then any mention
16522 of constants in this alternative is really to fool reload
16523 into allowing us to accept one there. We need to fix them up
16524 now so that we output the right code. */
16525 if (op_alt
[opno
].memory_ok
)
16527 rtx op
= recog_data
.operand
[opno
];
16529 if (CONSTANT_P (op
))
16532 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16533 recog_data
.operand_mode
[opno
], op
);
16535 else if (MEM_P (op
)
16536 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16537 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16541 rtx cop
= avoid_constant_pool_reference (op
);
16543 /* Casting the address of something to a mode narrower
16544 than a word can cause avoid_constant_pool_reference()
16545 to return the pool reference itself. That's no good to
16546 us here. Lets just hope that we can use the
16547 constant pool value directly. */
16549 cop
= get_pool_constant (XEXP (op
, 0));
16551 push_minipool_fix (insn
, address
,
16552 recog_data
.operand_loc
[opno
],
16553 recog_data
.operand_mode
[opno
], cop
);
16563 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16564 and unions in the context of ARMv8-M Security Extensions. It is used as a
16565 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16566 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16567 or four masks, depending on whether it is being computed for a
16568 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16569 respectively. The tree for the type of the argument or a field within an
16570 argument is passed in ARG_TYPE, the current register this argument or field
16571 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16572 argument or field starts at is passed in STARTING_BIT and the last used bit
16573 is kept in LAST_USED_BIT which is also updated accordingly. */
16575 static unsigned HOST_WIDE_INT
16576 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16577 uint32_t * padding_bits_to_clear
,
16578 unsigned starting_bit
, int * last_used_bit
)
16581 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16583 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16585 unsigned current_bit
= starting_bit
;
16587 long int offset
, size
;
16590 field
= TYPE_FIELDS (arg_type
);
16593 /* The offset within a structure is always an offset from
16594 the start of that structure. Make sure we take that into the
16595 calculation of the register based offset that we use here. */
16596 offset
= starting_bit
;
16597 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16600 /* This is the actual size of the field, for bitfields this is the
16601 bitfield width and not the container size. */
16602 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16604 if (*last_used_bit
!= offset
)
16606 if (offset
< *last_used_bit
)
16608 /* This field's offset is before the 'last_used_bit', that
16609 means this field goes on the next register. So we need to
16610 pad the rest of the current register and increase the
16611 register number. */
16613 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16616 padding_bits_to_clear
[*regno
] |= mask
;
16617 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16622 /* Otherwise we pad the bits between the last field's end and
16623 the start of the new field. */
16626 mask
= ((uint32_t)-1) >> (32 - offset
);
16627 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16628 padding_bits_to_clear
[*regno
] |= mask
;
16630 current_bit
= offset
;
16633 /* Calculate further padding bits for inner structs/unions too. */
16634 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16636 *last_used_bit
= current_bit
;
16637 not_to_clear_reg_mask
16638 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16639 padding_bits_to_clear
, offset
,
16644 /* Update 'current_bit' with this field's size. If the
16645 'current_bit' lies in a subsequent register, update 'regno' and
16646 reset 'current_bit' to point to the current bit in that new
16648 current_bit
+= size
;
16649 while (current_bit
>= 32)
16652 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16655 *last_used_bit
= current_bit
;
16658 field
= TREE_CHAIN (field
);
16660 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16662 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16664 tree field
, field_t
;
16665 int i
, regno_t
, field_size
;
16669 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16670 = {-1, -1, -1, -1};
16672 /* To compute the padding bits in a union we only consider bits as
16673 padding bits if they are always either a padding bit or fall outside a
16674 fields size for all fields in the union. */
16675 field
= TYPE_FIELDS (arg_type
);
16678 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16679 = {0U, 0U, 0U, 0U};
16680 int last_used_bit_t
= *last_used_bit
;
16682 field_t
= TREE_TYPE (field
);
16684 /* If the field's type is either a record or a union make sure to
16685 compute their padding bits too. */
16686 if (RECORD_OR_UNION_TYPE_P (field_t
))
16687 not_to_clear_reg_mask
16688 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16689 &padding_bits_to_clear_t
[0],
16690 starting_bit
, &last_used_bit_t
);
16693 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16694 regno_t
= (field_size
/ 32) + *regno
;
16695 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16698 for (i
= *regno
; i
< regno_t
; i
++)
16700 /* For all but the last register used by this field only keep the
16701 padding bits that were padding bits in this field. */
16702 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16705 /* For the last register, keep all padding bits that were padding
16706 bits in this field and any padding bits that are still valid
16707 as padding bits but fall outside of this field's size. */
16708 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16709 padding_bits_to_clear_res
[regno_t
]
16710 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16712 /* Update the maximum size of the fields in terms of registers used
16713 ('max_reg') and the 'last_used_bit' in said register. */
16714 if (max_reg
< regno_t
)
16717 max_bit
= last_used_bit_t
;
16719 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16720 max_bit
= last_used_bit_t
;
16722 field
= TREE_CHAIN (field
);
16725 /* Update the current padding_bits_to_clear using the intersection of the
16726 padding bits of all the fields. */
16727 for (i
=*regno
; i
< max_reg
; i
++)
16728 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16730 /* Do not keep trailing padding bits, we do not know yet whether this
16731 is the end of the argument. */
16732 mask
= ((uint32_t) 1 << max_bit
) - 1;
16733 padding_bits_to_clear
[max_reg
]
16734 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16737 *last_used_bit
= max_bit
;
16740 /* This function should only be used for structs and unions. */
16741 gcc_unreachable ();
16743 return not_to_clear_reg_mask
;
16746 /* In the context of ARMv8-M Security Extensions, this function is used for both
16747 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16748 registers are used when returning or passing arguments, which is then
16749 returned as a mask. It will also compute a mask to indicate padding/unused
16750 bits for each of these registers, and passes this through the
16751 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16752 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16753 the starting register used to pass this argument or return value is passed
16754 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16755 for struct and union types. */
16757 static unsigned HOST_WIDE_INT
16758 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16759 uint32_t * padding_bits_to_clear
)
16762 int last_used_bit
= 0;
16763 unsigned HOST_WIDE_INT not_to_clear_mask
;
16765 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16768 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16769 padding_bits_to_clear
, 0,
16773 /* If the 'last_used_bit' is not zero, that means we are still using a
16774 part of the last 'regno'. In such cases we must clear the trailing
16775 bits. Otherwise we are not using regno and we should mark it as to
16777 if (last_used_bit
!= 0)
16778 padding_bits_to_clear
[regno
]
16779 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16781 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16785 not_to_clear_mask
= 0;
16786 /* We are not dealing with structs nor unions. So these arguments may be
16787 passed in floating point registers too. In some cases a BLKmode is
16788 used when returning or passing arguments in multiple VFP registers. */
16789 if (GET_MODE (arg_rtx
) == BLKmode
)
16794 /* This should really only occur when dealing with the hard-float
16796 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16798 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16800 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16801 gcc_assert (REG_P (reg
));
16803 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16805 /* If we are dealing with DF mode, make sure we don't
16806 clear either of the registers it addresses. */
16807 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16810 unsigned HOST_WIDE_INT mask
;
16811 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16812 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16813 not_to_clear_mask
|= mask
;
16819 /* Otherwise we can rely on the MODE to determine how many registers
16820 are being used by this argument. */
16821 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16822 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16825 unsigned HOST_WIDE_INT
16826 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16827 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16828 not_to_clear_mask
|= mask
;
16833 return not_to_clear_mask
;
16836 /* Saves callee saved registers, clears callee saved registers and caller saved
16837 registers not used to pass arguments before a cmse_nonsecure_call. And
16838 restores the callee saved registers after. */
16841 cmse_nonsecure_call_clear_caller_saved (void)
16845 FOR_EACH_BB_FN (bb
, cfun
)
16849 FOR_BB_INSNS (bb
, insn
)
16851 uint64_t to_clear_mask
, float_mask
;
16853 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16854 unsigned int regno
, maxregno
;
16856 CUMULATIVE_ARGS args_so_far_v
;
16857 cumulative_args_t args_so_far
;
16858 tree arg_type
, fntype
;
16859 bool using_r4
, first_param
= true;
16860 function_args_iterator args_iter
;
16861 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
16862 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
16864 if (!NONDEBUG_INSN_P (insn
))
16867 if (!CALL_P (insn
))
16870 pat
= PATTERN (insn
);
16871 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
16872 call
= XVECEXP (pat
, 0, 0);
16874 /* Get the real call RTX if the insn sets a value, ie. returns. */
16875 if (GET_CODE (call
) == SET
)
16876 call
= SET_SRC (call
);
16878 /* Check if it is a cmse_nonsecure_call. */
16879 unspec
= XEXP (call
, 0);
16880 if (GET_CODE (unspec
) != UNSPEC
16881 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
16884 /* Determine the caller-saved registers we need to clear. */
16885 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
16886 maxregno
= NUM_ARG_REGS
- 1;
16887 /* Only look at the caller-saved floating point registers in case of
16888 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16889 lazy store and loads which clear both caller- and callee-saved
16891 if (TARGET_HARD_FLOAT_ABI
)
16893 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
16894 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
16895 to_clear_mask
|= float_mask
;
16896 maxregno
= D7_VFP_REGNUM
;
16899 /* Make sure the register used to hold the function address is not
16901 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
16902 gcc_assert (MEM_P (address
));
16903 gcc_assert (REG_P (XEXP (address
, 0)));
16904 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
16906 /* Set basic block of call insn so that df rescan is performed on
16907 insns inserted here. */
16908 set_block_for_insn (insn
, bb
);
16909 df_set_flags (DF_DEFER_INSN_RESCAN
);
16912 /* Make sure the scheduler doesn't schedule other insns beyond
16914 emit_insn (gen_blockage ());
16916 /* Walk through all arguments and clear registers appropriately.
16918 fntype
= TREE_TYPE (MEM_EXPR (address
));
16919 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
16921 args_so_far
= pack_cumulative_args (&args_so_far_v
);
16922 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
16925 machine_mode arg_mode
= TYPE_MODE (arg_type
);
16927 if (VOID_TYPE_P (arg_type
))
16931 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
16934 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
16936 gcc_assert (REG_P (arg_rtx
));
16938 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
16940 padding_bits_to_clear_ptr
);
16942 first_param
= false;
16945 /* Clear padding bits where needed. */
16946 cleared_reg
= XEXP (address
, 0);
16947 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
16949 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
16951 if (padding_bits_to_clear
[regno
] == 0)
16954 /* If this is a Thumb-1 target copy the address of the function
16955 we are calling from 'r4' into 'ip' such that we can use r4 to
16956 clear the unused bits in the arguments. */
16957 if (TARGET_THUMB1
&& !using_r4
)
16961 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
16965 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
16966 emit_move_insn (reg
, tmp
);
16967 /* Also fill the top half of the negated
16968 padding_bits_to_clear. */
16969 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
16971 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
16972 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
16978 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
16979 gen_rtx_REG (SImode
, regno
),
16984 emit_move_insn (cleared_reg
,
16985 gen_rtx_REG (SImode
, IP_REGNUM
));
16987 /* We use right shift and left shift to clear the LSB of the address
16988 we jump to instead of using bic, to avoid having to use an extra
16989 register on Thumb-1. */
16990 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
16991 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
16992 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
16993 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
16995 /* Clearing all registers that leak before doing a non-secure
16997 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
16999 if (!(to_clear_mask
& (1LL << regno
)))
17002 /* If regno is an even vfp register and its successor is also to
17003 be cleared, use vmov. */
17004 if (IS_VFP_REGNUM (regno
))
17006 if (TARGET_VFP_DOUBLE
17007 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17008 && to_clear_mask
& (1LL << (regno
+ 1)))
17009 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
17010 CONST0_RTX (DFmode
));
17012 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17013 CONST0_RTX (SFmode
));
17016 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17019 seq
= get_insns ();
17021 emit_insn_before (seq
, insn
);
17027 /* Rewrite move insn into subtract of 0 if the condition codes will
17028 be useful in next conditional jump insn. */
17031 thumb1_reorg (void)
17035 FOR_EACH_BB_FN (bb
, cfun
)
17038 rtx cmp
, op0
, op1
, set
= NULL
;
17039 rtx_insn
*prev
, *insn
= BB_END (bb
);
17040 bool insn_clobbered
= false;
17042 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17043 insn
= PREV_INSN (insn
);
17045 /* Find the last cbranchsi4_insn in basic block BB. */
17046 if (insn
== BB_HEAD (bb
)
17047 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17050 /* Get the register with which we are comparing. */
17051 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17052 op0
= XEXP (cmp
, 0);
17053 op1
= XEXP (cmp
, 1);
17055 /* Check that comparison is against ZERO. */
17056 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17059 /* Find the first flag setting insn before INSN in basic block BB. */
17060 gcc_assert (insn
!= BB_HEAD (bb
));
17061 for (prev
= PREV_INSN (insn
);
17063 && prev
!= BB_HEAD (bb
)
17065 || DEBUG_INSN_P (prev
)
17066 || ((set
= single_set (prev
)) != NULL
17067 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17068 prev
= PREV_INSN (prev
))
17070 if (reg_set_p (op0
, prev
))
17071 insn_clobbered
= true;
17074 /* Skip if op0 is clobbered by insn other than prev. */
17075 if (insn_clobbered
)
17081 dest
= SET_DEST (set
);
17082 src
= SET_SRC (set
);
17083 if (!low_register_operand (dest
, SImode
)
17084 || !low_register_operand (src
, SImode
))
17087 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17088 in INSN. Both src and dest of the move insn are checked. */
17089 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17091 dest
= copy_rtx (dest
);
17092 src
= copy_rtx (src
);
17093 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17094 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17095 INSN_CODE (prev
) = -1;
17096 /* Set test register in INSN to dest. */
17097 XEXP (cmp
, 0) = copy_rtx (dest
);
17098 INSN_CODE (insn
) = -1;
17103 /* Convert instructions to their cc-clobbering variant if possible, since
17104 that allows us to use smaller encodings. */
17107 thumb2_reorg (void)
17112 INIT_REG_SET (&live
);
17114 /* We are freeing block_for_insn in the toplev to keep compatibility
17115 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17116 compute_bb_for_insn ();
17119 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17121 FOR_EACH_BB_FN (bb
, cfun
)
17123 if ((current_tune
->disparage_flag_setting_t16_encodings
17124 == tune_params::DISPARAGE_FLAGS_ALL
)
17125 && optimize_bb_for_speed_p (bb
))
17129 Convert_Action action
= SKIP
;
17130 Convert_Action action_for_partial_flag_setting
17131 = ((current_tune
->disparage_flag_setting_t16_encodings
17132 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17133 && optimize_bb_for_speed_p (bb
))
17136 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17137 df_simulate_initialize_backwards (bb
, &live
);
17138 FOR_BB_INSNS_REVERSE (bb
, insn
)
17140 if (NONJUMP_INSN_P (insn
)
17141 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17142 && GET_CODE (PATTERN (insn
)) == SET
)
17145 rtx pat
= PATTERN (insn
);
17146 rtx dst
= XEXP (pat
, 0);
17147 rtx src
= XEXP (pat
, 1);
17148 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17150 if (UNARY_P (src
) || BINARY_P (src
))
17151 op0
= XEXP (src
, 0);
17153 if (BINARY_P (src
))
17154 op1
= XEXP (src
, 1);
17156 if (low_register_operand (dst
, SImode
))
17158 switch (GET_CODE (src
))
17161 /* Adding two registers and storing the result
17162 in the first source is already a 16-bit
17164 if (rtx_equal_p (dst
, op0
)
17165 && register_operand (op1
, SImode
))
17168 if (low_register_operand (op0
, SImode
))
17170 /* ADDS <Rd>,<Rn>,<Rm> */
17171 if (low_register_operand (op1
, SImode
))
17173 /* ADDS <Rdn>,#<imm8> */
17174 /* SUBS <Rdn>,#<imm8> */
17175 else if (rtx_equal_p (dst
, op0
)
17176 && CONST_INT_P (op1
)
17177 && IN_RANGE (INTVAL (op1
), -255, 255))
17179 /* ADDS <Rd>,<Rn>,#<imm3> */
17180 /* SUBS <Rd>,<Rn>,#<imm3> */
17181 else if (CONST_INT_P (op1
)
17182 && IN_RANGE (INTVAL (op1
), -7, 7))
17185 /* ADCS <Rd>, <Rn> */
17186 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17187 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17188 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17190 && COMPARISON_P (op1
)
17191 && cc_register (XEXP (op1
, 0), VOIDmode
)
17192 && maybe_get_arm_condition_code (op1
) == ARM_CS
17193 && XEXP (op1
, 1) == const0_rtx
)
17198 /* RSBS <Rd>,<Rn>,#0
17199 Not handled here: see NEG below. */
17200 /* SUBS <Rd>,<Rn>,#<imm3>
17202 Not handled here: see PLUS above. */
17203 /* SUBS <Rd>,<Rn>,<Rm> */
17204 if (low_register_operand (op0
, SImode
)
17205 && low_register_operand (op1
, SImode
))
17210 /* MULS <Rdm>,<Rn>,<Rdm>
17211 As an exception to the rule, this is only used
17212 when optimizing for size since MULS is slow on all
17213 known implementations. We do not even want to use
17214 MULS in cold code, if optimizing for speed, so we
17215 test the global flag here. */
17216 if (!optimize_size
)
17218 /* Fall through. */
17222 /* ANDS <Rdn>,<Rm> */
17223 if (rtx_equal_p (dst
, op0
)
17224 && low_register_operand (op1
, SImode
))
17225 action
= action_for_partial_flag_setting
;
17226 else if (rtx_equal_p (dst
, op1
)
17227 && low_register_operand (op0
, SImode
))
17228 action
= action_for_partial_flag_setting
== SKIP
17229 ? SKIP
: SWAP_CONV
;
17235 /* ASRS <Rdn>,<Rm> */
17236 /* LSRS <Rdn>,<Rm> */
17237 /* LSLS <Rdn>,<Rm> */
17238 if (rtx_equal_p (dst
, op0
)
17239 && low_register_operand (op1
, SImode
))
17240 action
= action_for_partial_flag_setting
;
17241 /* ASRS <Rd>,<Rm>,#<imm5> */
17242 /* LSRS <Rd>,<Rm>,#<imm5> */
17243 /* LSLS <Rd>,<Rm>,#<imm5> */
17244 else if (low_register_operand (op0
, SImode
)
17245 && CONST_INT_P (op1
)
17246 && IN_RANGE (INTVAL (op1
), 0, 31))
17247 action
= action_for_partial_flag_setting
;
17251 /* RORS <Rdn>,<Rm> */
17252 if (rtx_equal_p (dst
, op0
)
17253 && low_register_operand (op1
, SImode
))
17254 action
= action_for_partial_flag_setting
;
17258 /* MVNS <Rd>,<Rm> */
17259 if (low_register_operand (op0
, SImode
))
17260 action
= action_for_partial_flag_setting
;
17264 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17265 if (low_register_operand (op0
, SImode
))
17270 /* MOVS <Rd>,#<imm8> */
17271 if (CONST_INT_P (src
)
17272 && IN_RANGE (INTVAL (src
), 0, 255))
17273 action
= action_for_partial_flag_setting
;
17277 /* MOVS and MOV<c> with registers have different
17278 encodings, so are not relevant here. */
17286 if (action
!= SKIP
)
17288 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17289 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17292 if (action
== SWAP_CONV
)
17294 src
= copy_rtx (src
);
17295 XEXP (src
, 0) = op1
;
17296 XEXP (src
, 1) = op0
;
17297 pat
= gen_rtx_SET (dst
, src
);
17298 vec
= gen_rtvec (2, pat
, clobber
);
17300 else /* action == CONV */
17301 vec
= gen_rtvec (2, pat
, clobber
);
17303 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17304 INSN_CODE (insn
) = -1;
17308 if (NONDEBUG_INSN_P (insn
))
17309 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17313 CLEAR_REG_SET (&live
);
17316 /* Gcc puts the pool in the wrong place for ARM, since we can only
17317 load addresses a limited distance around the pc. We do some
17318 special munging to move the constant pool values to the correct
17319 point in the code. */
17324 HOST_WIDE_INT address
= 0;
17328 cmse_nonsecure_call_clear_caller_saved ();
17331 else if (TARGET_THUMB2
)
17334 /* Ensure all insns that must be split have been split at this point.
17335 Otherwise, the pool placement code below may compute incorrect
17336 insn lengths. Note that when optimizing, all insns have already
17337 been split at this point. */
17339 split_all_insns_noflow ();
17341 minipool_fix_head
= minipool_fix_tail
= NULL
;
17343 /* The first insn must always be a note, or the code below won't
17344 scan it properly. */
17345 insn
= get_insns ();
17346 gcc_assert (NOTE_P (insn
));
17349 /* Scan all the insns and record the operands that will need fixing. */
17350 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17352 if (BARRIER_P (insn
))
17353 push_minipool_barrier (insn
, address
);
17354 else if (INSN_P (insn
))
17356 rtx_jump_table_data
*table
;
17358 note_invalid_constants (insn
, address
, true);
17359 address
+= get_attr_length (insn
);
17361 /* If the insn is a vector jump, add the size of the table
17362 and skip the table. */
17363 if (tablejump_p (insn
, NULL
, &table
))
17365 address
+= get_jump_table_size (table
);
17369 else if (LABEL_P (insn
))
17370 /* Add the worst-case padding due to alignment. We don't add
17371 the _current_ padding because the minipool insertions
17372 themselves might change it. */
17373 address
+= get_label_padding (insn
);
17376 fix
= minipool_fix_head
;
17378 /* Now scan the fixups and perform the required changes. */
17383 Mfix
* last_added_fix
;
17384 Mfix
* last_barrier
= NULL
;
17387 /* Skip any further barriers before the next fix. */
17388 while (fix
&& BARRIER_P (fix
->insn
))
17391 /* No more fixes. */
17395 last_added_fix
= NULL
;
17397 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17399 if (BARRIER_P (ftmp
->insn
))
17401 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17404 last_barrier
= ftmp
;
17406 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17409 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17412 /* If we found a barrier, drop back to that; any fixes that we
17413 could have reached but come after the barrier will now go in
17414 the next mini-pool. */
17415 if (last_barrier
!= NULL
)
17417 /* Reduce the refcount for those fixes that won't go into this
17419 for (fdel
= last_barrier
->next
;
17420 fdel
&& fdel
!= ftmp
;
17423 fdel
->minipool
->refcount
--;
17424 fdel
->minipool
= NULL
;
17427 ftmp
= last_barrier
;
17431 /* ftmp is first fix that we can't fit into this pool and
17432 there no natural barriers that we could use. Insert a
17433 new barrier in the code somewhere between the previous
17434 fix and this one, and arrange to jump around it. */
17435 HOST_WIDE_INT max_address
;
17437 /* The last item on the list of fixes must be a barrier, so
17438 we can never run off the end of the list of fixes without
17439 last_barrier being set. */
17442 max_address
= minipool_vector_head
->max_address
;
17443 /* Check that there isn't another fix that is in range that
17444 we couldn't fit into this pool because the pool was
17445 already too large: we need to put the pool before such an
17446 instruction. The pool itself may come just after the
17447 fix because create_fix_barrier also allows space for a
17448 jump instruction. */
17449 if (ftmp
->address
< max_address
)
17450 max_address
= ftmp
->address
+ 1;
17452 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17455 assign_minipool_offsets (last_barrier
);
17459 if (!BARRIER_P (ftmp
->insn
)
17460 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17467 /* Scan over the fixes we have identified for this pool, fixing them
17468 up and adding the constants to the pool itself. */
17469 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17470 this_fix
= this_fix
->next
)
17471 if (!BARRIER_P (this_fix
->insn
))
17474 = plus_constant (Pmode
,
17475 gen_rtx_LABEL_REF (VOIDmode
,
17476 minipool_vector_label
),
17477 this_fix
->minipool
->offset
);
17478 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17481 dump_minipool (last_barrier
->insn
);
17485 /* From now on we must synthesize any constants that we can't handle
17486 directly. This can happen if the RTL gets split during final
17487 instruction generation. */
17488 cfun
->machine
->after_arm_reorg
= 1;
17490 /* Free the minipool memory. */
17491 obstack_free (&minipool_obstack
, minipool_startobj
);
17494 /* Routines to output assembly language. */
17496 /* Return string representation of passed in real value. */
17497 static const char *
17498 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17500 if (!fp_consts_inited
)
17503 gcc_assert (real_equal (r
, &value_fp0
));
17507 /* OPERANDS[0] is the entire list of insns that constitute pop,
17508 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17509 is in the list, UPDATE is true iff the list contains explicit
17510 update of base register. */
17512 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17518 const char *conditional
;
17519 int num_saves
= XVECLEN (operands
[0], 0);
17520 unsigned int regno
;
17521 unsigned int regno_base
= REGNO (operands
[1]);
17522 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17525 offset
+= update
? 1 : 0;
17526 offset
+= return_pc
? 1 : 0;
17528 /* Is the base register in the list? */
17529 for (i
= offset
; i
< num_saves
; i
++)
17531 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17532 /* If SP is in the list, then the base register must be SP. */
17533 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17534 /* If base register is in the list, there must be no explicit update. */
17535 if (regno
== regno_base
)
17536 gcc_assert (!update
);
17539 conditional
= reverse
? "%?%D0" : "%?%d0";
17540 /* Can't use POP if returning from an interrupt. */
17541 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17542 sprintf (pattern
, "pop%s\t{", conditional
);
17545 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17546 It's just a convention, their semantics are identical. */
17547 if (regno_base
== SP_REGNUM
)
17548 sprintf (pattern
, "ldmfd%s\t", conditional
);
17550 sprintf (pattern
, "ldmia%s\t", conditional
);
17552 sprintf (pattern
, "ldm%s\t", conditional
);
17554 strcat (pattern
, reg_names
[regno_base
]);
17556 strcat (pattern
, "!, {");
17558 strcat (pattern
, ", {");
17561 /* Output the first destination register. */
17563 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17565 /* Output the rest of the destination registers. */
17566 for (i
= offset
+ 1; i
< num_saves
; i
++)
17568 strcat (pattern
, ", ");
17570 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17573 strcat (pattern
, "}");
17575 if (interrupt_p
&& return_pc
)
17576 strcat (pattern
, "^");
17578 output_asm_insn (pattern
, &cond
);
17582 /* Output the assembly for a store multiple. */
17585 vfp_output_vstmd (rtx
* operands
)
17591 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17592 ? XEXP (operands
[0], 0)
17593 : XEXP (XEXP (operands
[0], 0), 0);
17594 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17597 strcpy (pattern
, "vpush%?.64\t{%P1");
17599 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17601 p
= strlen (pattern
);
17603 gcc_assert (REG_P (operands
[1]));
17605 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17606 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17608 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17610 strcpy (&pattern
[p
], "}");
17612 output_asm_insn (pattern
, operands
);
17617 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17618 number of bytes pushed. */
17621 vfp_emit_fstmd (int base_reg
, int count
)
17628 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17629 register pairs are stored by a store multiple insn. We avoid this
17630 by pushing an extra pair. */
17631 if (count
== 2 && !arm_arch6
)
17633 if (base_reg
== LAST_VFP_REGNUM
- 3)
17638 /* FSTMD may not store more than 16 doubleword registers at once. Split
17639 larger stores into multiple parts (up to a maximum of two, in
17644 /* NOTE: base_reg is an internal register number, so each D register
17646 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17647 saved
+= vfp_emit_fstmd (base_reg
, 16);
17651 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17652 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17654 reg
= gen_rtx_REG (DFmode
, base_reg
);
17657 XVECEXP (par
, 0, 0)
17658 = gen_rtx_SET (gen_frame_mem
17660 gen_rtx_PRE_MODIFY (Pmode
,
17663 (Pmode
, stack_pointer_rtx
,
17666 gen_rtx_UNSPEC (BLKmode
,
17667 gen_rtvec (1, reg
),
17668 UNSPEC_PUSH_MULT
));
17670 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17671 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17672 RTX_FRAME_RELATED_P (tmp
) = 1;
17673 XVECEXP (dwarf
, 0, 0) = tmp
;
17675 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17676 RTX_FRAME_RELATED_P (tmp
) = 1;
17677 XVECEXP (dwarf
, 0, 1) = tmp
;
17679 for (i
= 1; i
< count
; i
++)
17681 reg
= gen_rtx_REG (DFmode
, base_reg
);
17683 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17685 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17686 plus_constant (Pmode
,
17690 RTX_FRAME_RELATED_P (tmp
) = 1;
17691 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17694 par
= emit_insn (par
);
17695 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17696 RTX_FRAME_RELATED_P (par
) = 1;
17701 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17702 has the cmse_nonsecure_call attribute and returns false otherwise. */
17705 detect_cmse_nonsecure_call (tree addr
)
17710 tree fntype
= TREE_TYPE (addr
);
17711 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17712 TYPE_ATTRIBUTES (fntype
)))
17718 /* Emit a call instruction with pattern PAT. ADDR is the address of
17719 the call target. */
17722 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17726 insn
= emit_call_insn (pat
);
17728 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17729 If the call might use such an entry, add a use of the PIC register
17730 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17731 if (TARGET_VXWORKS_RTP
17734 && GET_CODE (addr
) == SYMBOL_REF
17735 && (SYMBOL_REF_DECL (addr
)
17736 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17737 : !SYMBOL_REF_LOCAL_P (addr
)))
17739 require_pic_register ();
17740 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17743 if (TARGET_AAPCS_BASED
)
17745 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17746 linker. We need to add an IP clobber to allow setting
17747 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17748 is not needed since it's a fixed register. */
17749 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17750 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17754 /* Output a 'call' insn. */
17756 output_call (rtx
*operands
)
17758 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17760 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17761 if (REGNO (operands
[0]) == LR_REGNUM
)
17763 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17764 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17767 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17769 if (TARGET_INTERWORK
|| arm_arch4t
)
17770 output_asm_insn ("bx%?\t%0", operands
);
17772 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17777 /* Output a move from arm registers to arm registers of a long double
17778 OPERANDS[0] is the destination.
17779 OPERANDS[1] is the source. */
17781 output_mov_long_double_arm_from_arm (rtx
*operands
)
17783 /* We have to be careful here because the two might overlap. */
17784 int dest_start
= REGNO (operands
[0]);
17785 int src_start
= REGNO (operands
[1]);
17789 if (dest_start
< src_start
)
17791 for (i
= 0; i
< 3; i
++)
17793 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17794 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17795 output_asm_insn ("mov%?\t%0, %1", ops
);
17800 for (i
= 2; i
>= 0; i
--)
17802 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17803 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17804 output_asm_insn ("mov%?\t%0, %1", ops
);
17812 arm_emit_movpair (rtx dest
, rtx src
)
17814 /* If the src is an immediate, simplify it. */
17815 if (CONST_INT_P (src
))
17817 HOST_WIDE_INT val
= INTVAL (src
);
17818 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17819 if ((val
>> 16) & 0x0000ffff)
17821 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17823 GEN_INT ((val
>> 16) & 0x0000ffff));
17824 rtx_insn
*insn
= get_last_insn ();
17825 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17829 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17830 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17831 rtx_insn
*insn
= get_last_insn ();
17832 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17835 /* Output a move between double words. It must be REG<-MEM
17838 output_move_double (rtx
*operands
, bool emit
, int *count
)
17840 enum rtx_code code0
= GET_CODE (operands
[0]);
17841 enum rtx_code code1
= GET_CODE (operands
[1]);
17846 /* The only case when this might happen is when
17847 you are looking at the length of a DImode instruction
17848 that has an invalid constant in it. */
17849 if (code0
== REG
&& code1
!= MEM
)
17851 gcc_assert (!emit
);
17858 unsigned int reg0
= REGNO (operands
[0]);
17860 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17862 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17864 switch (GET_CODE (XEXP (operands
[1], 0)))
17871 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17872 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
17874 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17879 gcc_assert (TARGET_LDRD
);
17881 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17888 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17890 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
17898 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
17900 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
17905 gcc_assert (TARGET_LDRD
);
17907 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
17912 /* Autoicrement addressing modes should never have overlapping
17913 base and destination registers, and overlapping index registers
17914 are already prohibited, so this doesn't need to worry about
17916 otherops
[0] = operands
[0];
17917 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17918 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17920 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17922 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17924 /* Registers overlap so split out the increment. */
17927 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17928 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
17935 /* Use a single insn if we can.
17936 FIXME: IWMMXT allows offsets larger than ldrd can
17937 handle, fix these up with a pair of ldr. */
17939 || !CONST_INT_P (otherops
[2])
17940 || (INTVAL (otherops
[2]) > -256
17941 && INTVAL (otherops
[2]) < 256))
17944 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
17950 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17951 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17961 /* Use a single insn if we can.
17962 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17963 fix these up with a pair of ldr. */
17965 || !CONST_INT_P (otherops
[2])
17966 || (INTVAL (otherops
[2]) > -256
17967 && INTVAL (otherops
[2]) < 256))
17970 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
17976 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17977 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17987 /* We might be able to use ldrd %0, %1 here. However the range is
17988 different to ldr/adr, and it is broken on some ARMv7-M
17989 implementations. */
17990 /* Use the second register of the pair to avoid problematic
17992 otherops
[1] = operands
[1];
17994 output_asm_insn ("adr%?\t%0, %1", otherops
);
17995 operands
[1] = otherops
[0];
17999 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18001 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18008 /* ??? This needs checking for thumb2. */
18010 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18011 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18013 otherops
[0] = operands
[0];
18014 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18015 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18017 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18019 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18021 switch ((int) INTVAL (otherops
[2]))
18025 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18031 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18037 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18041 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18042 operands
[1] = otherops
[0];
18044 && (REG_P (otherops
[2])
18046 || (CONST_INT_P (otherops
[2])
18047 && INTVAL (otherops
[2]) > -256
18048 && INTVAL (otherops
[2]) < 256)))
18050 if (reg_overlap_mentioned_p (operands
[0],
18053 /* Swap base and index registers over to
18054 avoid a conflict. */
18055 std::swap (otherops
[1], otherops
[2]);
18057 /* If both registers conflict, it will usually
18058 have been fixed by a splitter. */
18059 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18060 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18064 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18065 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18072 otherops
[0] = operands
[0];
18074 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18079 if (CONST_INT_P (otherops
[2]))
18083 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18084 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18086 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18092 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18098 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18105 return "ldrd%?\t%0, [%1]";
18107 return "ldmia%?\t%1, %M0";
18111 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18112 /* Take care of overlapping base/data reg. */
18113 if (reg_mentioned_p (operands
[0], operands
[1]))
18117 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18118 output_asm_insn ("ldr%?\t%0, %1", operands
);
18128 output_asm_insn ("ldr%?\t%0, %1", operands
);
18129 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18139 /* Constraints should ensure this. */
18140 gcc_assert (code0
== MEM
&& code1
== REG
);
18141 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18142 || (TARGET_ARM
&& TARGET_LDRD
));
18144 switch (GET_CODE (XEXP (operands
[0], 0)))
18150 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18152 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18157 gcc_assert (TARGET_LDRD
);
18159 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18166 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18168 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18176 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18178 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18183 gcc_assert (TARGET_LDRD
);
18185 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18190 otherops
[0] = operands
[1];
18191 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18192 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18194 /* IWMMXT allows offsets larger than ldrd can handle,
18195 fix these up with a pair of ldr. */
18197 && CONST_INT_P (otherops
[2])
18198 && (INTVAL(otherops
[2]) <= -256
18199 || INTVAL(otherops
[2]) >= 256))
18201 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18205 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18206 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18215 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18216 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18222 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18225 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18230 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18235 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18236 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18238 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18242 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18249 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18256 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18261 && (REG_P (otherops
[2])
18263 || (CONST_INT_P (otherops
[2])
18264 && INTVAL (otherops
[2]) > -256
18265 && INTVAL (otherops
[2]) < 256)))
18267 otherops
[0] = operands
[1];
18268 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18270 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18276 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18277 otherops
[1] = operands
[1];
18280 output_asm_insn ("str%?\t%1, %0", operands
);
18281 output_asm_insn ("str%?\t%H1, %0", otherops
);
18291 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18292 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18295 output_move_quad (rtx
*operands
)
18297 if (REG_P (operands
[0]))
18299 /* Load, or reg->reg move. */
18301 if (MEM_P (operands
[1]))
18303 switch (GET_CODE (XEXP (operands
[1], 0)))
18306 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18311 output_asm_insn ("adr%?\t%0, %1", operands
);
18312 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18316 gcc_unreachable ();
18324 gcc_assert (REG_P (operands
[1]));
18326 dest
= REGNO (operands
[0]);
18327 src
= REGNO (operands
[1]);
18329 /* This seems pretty dumb, but hopefully GCC won't try to do it
18332 for (i
= 0; i
< 4; i
++)
18334 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18335 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18336 output_asm_insn ("mov%?\t%0, %1", ops
);
18339 for (i
= 3; i
>= 0; i
--)
18341 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18342 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18343 output_asm_insn ("mov%?\t%0, %1", ops
);
18349 gcc_assert (MEM_P (operands
[0]));
18350 gcc_assert (REG_P (operands
[1]));
18351 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18353 switch (GET_CODE (XEXP (operands
[0], 0)))
18356 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18360 gcc_unreachable ();
18367 /* Output a VFP load or store instruction. */
18370 output_move_vfp (rtx
*operands
)
18372 rtx reg
, mem
, addr
, ops
[2];
18373 int load
= REG_P (operands
[0]);
18374 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18375 int sp
= (!TARGET_VFP_FP16INST
18376 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18377 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18382 reg
= operands
[!load
];
18383 mem
= operands
[load
];
18385 mode
= GET_MODE (reg
);
18387 gcc_assert (REG_P (reg
));
18388 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18389 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18395 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18396 gcc_assert (MEM_P (mem
));
18398 addr
= XEXP (mem
, 0);
18400 switch (GET_CODE (addr
))
18403 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18404 ops
[0] = XEXP (addr
, 0);
18409 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18410 ops
[0] = XEXP (addr
, 0);
18415 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18421 sprintf (buff
, templ
,
18422 load
? "ld" : "st",
18423 dp
? "64" : sp
? "32" : "16",
18425 integer_p
? "\t%@ int" : "");
18426 output_asm_insn (buff
, ops
);
18431 /* Output a Neon double-word or quad-word load or store, or a load
18432 or store for larger structure modes.
18434 WARNING: The ordering of elements is weird in big-endian mode,
18435 because the EABI requires that vectors stored in memory appear
18436 as though they were stored by a VSTM, as required by the EABI.
18437 GCC RTL defines element ordering based on in-memory order.
18438 This can be different from the architectural ordering of elements
18439 within a NEON register. The intrinsics defined in arm_neon.h use the
18440 NEON register element ordering, not the GCC RTL element ordering.
18442 For example, the in-memory ordering of a big-endian a quadword
18443 vector with 16-bit elements when stored from register pair {d0,d1}
18444 will be (lowest address first, d0[N] is NEON register element N):
18446 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18448 When necessary, quadword registers (dN, dN+1) are moved to ARM
18449 registers from rN in the order:
18451 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18453 So that STM/LDM can be used on vectors in ARM registers, and the
18454 same memory layout will result as if VSTM/VLDM were used.
18456 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18457 possible, which allows use of appropriate alignment tags.
18458 Note that the choice of "64" is independent of the actual vector
18459 element size; this size simply ensures that the behavior is
18460 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18462 Due to limitations of those instructions, use of VST1.64/VLD1.64
18463 is not possible if:
18464 - the address contains PRE_DEC, or
18465 - the mode refers to more than 4 double-word registers
18467 In those cases, it would be possible to replace VSTM/VLDM by a
18468 sequence of instructions; this is not currently implemented since
18469 this is not certain to actually improve performance. */
18472 output_move_neon (rtx
*operands
)
18474 rtx reg
, mem
, addr
, ops
[2];
18475 int regno
, nregs
, load
= REG_P (operands
[0]);
18480 reg
= operands
[!load
];
18481 mem
= operands
[load
];
18483 mode
= GET_MODE (reg
);
18485 gcc_assert (REG_P (reg
));
18486 regno
= REGNO (reg
);
18487 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18488 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18489 || NEON_REGNO_OK_FOR_QUAD (regno
));
18490 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18491 || VALID_NEON_QREG_MODE (mode
)
18492 || VALID_NEON_STRUCT_MODE (mode
));
18493 gcc_assert (MEM_P (mem
));
18495 addr
= XEXP (mem
, 0);
18497 /* Strip off const from addresses like (const (plus (...))). */
18498 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18499 addr
= XEXP (addr
, 0);
18501 switch (GET_CODE (addr
))
18504 /* We have to use vldm / vstm for too-large modes. */
18507 templ
= "v%smia%%?\t%%0!, %%h1";
18508 ops
[0] = XEXP (addr
, 0);
18512 templ
= "v%s1.64\t%%h1, %%A0";
18519 /* We have to use vldm / vstm in this case, since there is no
18520 pre-decrement form of the vld1 / vst1 instructions. */
18521 templ
= "v%smdb%%?\t%%0!, %%h1";
18522 ops
[0] = XEXP (addr
, 0);
18527 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18528 gcc_unreachable ();
18531 /* We have to use vldm / vstm for too-large modes. */
18535 templ
= "v%smia%%?\t%%m0, %%h1";
18537 templ
= "v%s1.64\t%%h1, %%A0";
18543 /* Fall through. */
18549 for (i
= 0; i
< nregs
; i
++)
18551 /* We're only using DImode here because it's a convenient size. */
18552 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18553 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18554 if (reg_overlap_mentioned_p (ops
[0], mem
))
18556 gcc_assert (overlap
== -1);
18561 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18562 output_asm_insn (buff
, ops
);
18567 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18568 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18569 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18570 output_asm_insn (buff
, ops
);
18577 gcc_unreachable ();
18580 sprintf (buff
, templ
, load
? "ld" : "st");
18581 output_asm_insn (buff
, ops
);
18586 /* Compute and return the length of neon_mov<mode>, where <mode> is
18587 one of VSTRUCT modes: EI, OI, CI or XI. */
18589 arm_attr_length_move_neon (rtx_insn
*insn
)
18591 rtx reg
, mem
, addr
;
18595 extract_insn_cached (insn
);
18597 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18599 mode
= GET_MODE (recog_data
.operand
[0]);
18610 gcc_unreachable ();
18614 load
= REG_P (recog_data
.operand
[0]);
18615 reg
= recog_data
.operand
[!load
];
18616 mem
= recog_data
.operand
[load
];
18618 gcc_assert (MEM_P (mem
));
18620 mode
= GET_MODE (reg
);
18621 addr
= XEXP (mem
, 0);
18623 /* Strip off const from addresses like (const (plus (...))). */
18624 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18625 addr
= XEXP (addr
, 0);
18627 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18629 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18636 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18640 arm_address_offset_is_imm (rtx_insn
*insn
)
18644 extract_insn_cached (insn
);
18646 if (REG_P (recog_data
.operand
[0]))
18649 mem
= recog_data
.operand
[0];
18651 gcc_assert (MEM_P (mem
));
18653 addr
= XEXP (mem
, 0);
18656 || (GET_CODE (addr
) == PLUS
18657 && REG_P (XEXP (addr
, 0))
18658 && CONST_INT_P (XEXP (addr
, 1))))
18664 /* Output an ADD r, s, #n where n may be too big for one instruction.
18665 If adding zero to one register, output nothing. */
18667 output_add_immediate (rtx
*operands
)
18669 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18671 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18674 output_multi_immediate (operands
,
18675 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18678 output_multi_immediate (operands
,
18679 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18686 /* Output a multiple immediate operation.
18687 OPERANDS is the vector of operands referred to in the output patterns.
18688 INSTR1 is the output pattern to use for the first constant.
18689 INSTR2 is the output pattern to use for subsequent constants.
18690 IMMED_OP is the index of the constant slot in OPERANDS.
18691 N is the constant value. */
18692 static const char *
18693 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18694 int immed_op
, HOST_WIDE_INT n
)
18696 #if HOST_BITS_PER_WIDE_INT > 32
18702 /* Quick and easy output. */
18703 operands
[immed_op
] = const0_rtx
;
18704 output_asm_insn (instr1
, operands
);
18709 const char * instr
= instr1
;
18711 /* Note that n is never zero here (which would give no output). */
18712 for (i
= 0; i
< 32; i
+= 2)
18716 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18717 output_asm_insn (instr
, operands
);
18727 /* Return the name of a shifter operation. */
18728 static const char *
18729 arm_shift_nmem(enum rtx_code code
)
18734 return ARM_LSL_NAME
;
18750 /* Return the appropriate ARM instruction for the operation code.
18751 The returned result should not be overwritten. OP is the rtx of the
18752 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18755 arithmetic_instr (rtx op
, int shift_first_arg
)
18757 switch (GET_CODE (op
))
18763 return shift_first_arg
? "rsb" : "sub";
18778 return arm_shift_nmem(GET_CODE(op
));
18781 gcc_unreachable ();
18785 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18786 for the operation code. The returned result should not be overwritten.
18787 OP is the rtx code of the shift.
18788 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18790 static const char *
18791 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18794 enum rtx_code code
= GET_CODE (op
);
18799 if (!CONST_INT_P (XEXP (op
, 1)))
18801 output_operand_lossage ("invalid shift operand");
18806 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18814 mnem
= arm_shift_nmem(code
);
18815 if (CONST_INT_P (XEXP (op
, 1)))
18817 *amountp
= INTVAL (XEXP (op
, 1));
18819 else if (REG_P (XEXP (op
, 1)))
18826 output_operand_lossage ("invalid shift operand");
18832 /* We never have to worry about the amount being other than a
18833 power of 2, since this case can never be reloaded from a reg. */
18834 if (!CONST_INT_P (XEXP (op
, 1)))
18836 output_operand_lossage ("invalid shift operand");
18840 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18842 /* Amount must be a power of two. */
18843 if (*amountp
& (*amountp
- 1))
18845 output_operand_lossage ("invalid shift operand");
18849 *amountp
= exact_log2 (*amountp
);
18850 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18851 return ARM_LSL_NAME
;
18854 output_operand_lossage ("invalid shift operand");
18858 /* This is not 100% correct, but follows from the desire to merge
18859 multiplication by a power of 2 with the recognizer for a
18860 shift. >=32 is not a valid shift for "lsl", so we must try and
18861 output a shift that produces the correct arithmetical result.
18862 Using lsr #32 is identical except for the fact that the carry bit
18863 is not set correctly if we set the flags; but we never use the
18864 carry bit from such an operation, so we can ignore that. */
18865 if (code
== ROTATERT
)
18866 /* Rotate is just modulo 32. */
18868 else if (*amountp
!= (*amountp
& 31))
18870 if (code
== ASHIFT
)
18875 /* Shifts of 0 are no-ops. */
18882 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18883 because /bin/as is horribly restrictive. The judgement about
18884 whether or not each character is 'printable' (and can be output as
18885 is) or not (and must be printed with an octal escape) must be made
18886 with reference to the *host* character set -- the situation is
18887 similar to that discussed in the comments above pp_c_char in
18888 c-pretty-print.c. */
18890 #define MAX_ASCII_LEN 51
18893 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18896 int len_so_far
= 0;
18898 fputs ("\t.ascii\t\"", stream
);
18900 for (i
= 0; i
< len
; i
++)
18904 if (len_so_far
>= MAX_ASCII_LEN
)
18906 fputs ("\"\n\t.ascii\t\"", stream
);
18912 if (c
== '\\' || c
== '\"')
18914 putc ('\\', stream
);
18922 fprintf (stream
, "\\%03o", c
);
18927 fputs ("\"\n", stream
);
18930 /* Whether a register is callee saved or not. This is necessary because high
18931 registers are marked as caller saved when optimizing for size on Thumb-1
18932 targets despite being callee saved in order to avoid using them. */
18933 #define callee_saved_reg_p(reg) \
18934 (!call_used_regs[reg] \
18935 || (TARGET_THUMB1 && optimize_size \
18936 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18938 /* Compute the register save mask for registers 0 through 12
18939 inclusive. This code is used by arm_compute_save_reg_mask. */
18941 static unsigned long
18942 arm_compute_save_reg0_reg12_mask (void)
18944 unsigned long func_type
= arm_current_func_type ();
18945 unsigned long save_reg_mask
= 0;
18948 if (IS_INTERRUPT (func_type
))
18950 unsigned int max_reg
;
18951 /* Interrupt functions must not corrupt any registers,
18952 even call clobbered ones. If this is a leaf function
18953 we can just examine the registers used by the RTL, but
18954 otherwise we have to assume that whatever function is
18955 called might clobber anything, and so we have to save
18956 all the call-clobbered registers as well. */
18957 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18958 /* FIQ handlers have registers r8 - r12 banked, so
18959 we only need to check r0 - r7, Normal ISRs only
18960 bank r14 and r15, so we must check up to r12.
18961 r13 is the stack pointer which is always preserved,
18962 so we do not need to consider it here. */
18967 for (reg
= 0; reg
<= max_reg
; reg
++)
18968 if (df_regs_ever_live_p (reg
)
18969 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18970 save_reg_mask
|= (1 << reg
);
18972 /* Also save the pic base register if necessary. */
18974 && !TARGET_SINGLE_PIC_BASE
18975 && arm_pic_register
!= INVALID_REGNUM
18976 && crtl
->uses_pic_offset_table
)
18977 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18979 else if (IS_VOLATILE(func_type
))
18981 /* For noreturn functions we historically omitted register saves
18982 altogether. However this really messes up debugging. As a
18983 compromise save just the frame pointers. Combined with the link
18984 register saved elsewhere this should be sufficient to get
18986 if (frame_pointer_needed
)
18987 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18988 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18989 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18990 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18991 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18995 /* In the normal case we only need to save those registers
18996 which are call saved and which are used by this function. */
18997 for (reg
= 0; reg
<= 11; reg
++)
18998 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
18999 save_reg_mask
|= (1 << reg
);
19001 /* Handle the frame pointer as a special case. */
19002 if (frame_pointer_needed
)
19003 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19005 /* If we aren't loading the PIC register,
19006 don't stack it even though it may be live. */
19008 && !TARGET_SINGLE_PIC_BASE
19009 && arm_pic_register
!= INVALID_REGNUM
19010 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19011 || crtl
->uses_pic_offset_table
))
19012 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19014 /* The prologue will copy SP into R0, so save it. */
19015 if (IS_STACKALIGN (func_type
))
19016 save_reg_mask
|= 1;
19019 /* Save registers so the exception handler can modify them. */
19020 if (crtl
->calls_eh_return
)
19026 reg
= EH_RETURN_DATA_REGNO (i
);
19027 if (reg
== INVALID_REGNUM
)
19029 save_reg_mask
|= 1 << reg
;
19033 return save_reg_mask
;
19036 /* Return true if r3 is live at the start of the function. */
19039 arm_r3_live_at_start_p (void)
19041 /* Just look at cfg info, which is still close enough to correct at this
19042 point. This gives false positives for broken functions that might use
19043 uninitialized data that happens to be allocated in r3, but who cares? */
19044 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19047 /* Compute the number of bytes used to store the static chain register on the
19048 stack, above the stack frame. We need to know this accurately to get the
19049 alignment of the rest of the stack frame correct. */
19052 arm_compute_static_chain_stack_bytes (void)
19054 /* See the defining assertion in arm_expand_prologue. */
19055 if (IS_NESTED (arm_current_func_type ())
19056 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19057 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19058 && !df_regs_ever_live_p (LR_REGNUM
)))
19059 && arm_r3_live_at_start_p ()
19060 && crtl
->args
.pretend_args_size
== 0)
19066 /* Compute a bit mask of which registers need to be
19067 saved on the stack for the current function.
19068 This is used by arm_get_frame_offsets, which may add extra registers. */
19070 static unsigned long
19071 arm_compute_save_reg_mask (void)
19073 unsigned int save_reg_mask
= 0;
19074 unsigned long func_type
= arm_current_func_type ();
19077 if (IS_NAKED (func_type
))
19078 /* This should never really happen. */
19081 /* If we are creating a stack frame, then we must save the frame pointer,
19082 IP (which will hold the old stack pointer), LR and the PC. */
19083 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19085 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19088 | (1 << PC_REGNUM
);
19090 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19092 /* Decide if we need to save the link register.
19093 Interrupt routines have their own banked link register,
19094 so they never need to save it.
19095 Otherwise if we do not use the link register we do not need to save
19096 it. If we are pushing other registers onto the stack however, we
19097 can save an instruction in the epilogue by pushing the link register
19098 now and then popping it back into the PC. This incurs extra memory
19099 accesses though, so we only do it when optimizing for size, and only
19100 if we know that we will not need a fancy return sequence. */
19101 if (df_regs_ever_live_p (LR_REGNUM
)
19104 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19105 && !crtl
->tail_call_emit
19106 && !crtl
->calls_eh_return
))
19107 save_reg_mask
|= 1 << LR_REGNUM
;
19109 if (cfun
->machine
->lr_save_eliminated
)
19110 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19112 if (TARGET_REALLY_IWMMXT
19113 && ((bit_count (save_reg_mask
)
19114 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19115 arm_compute_static_chain_stack_bytes())
19118 /* The total number of registers that are going to be pushed
19119 onto the stack is odd. We need to ensure that the stack
19120 is 64-bit aligned before we start to save iWMMXt registers,
19121 and also before we start to create locals. (A local variable
19122 might be a double or long long which we will load/store using
19123 an iWMMXt instruction). Therefore we need to push another
19124 ARM register, so that the stack will be 64-bit aligned. We
19125 try to avoid using the arg registers (r0 -r3) as they might be
19126 used to pass values in a tail call. */
19127 for (reg
= 4; reg
<= 12; reg
++)
19128 if ((save_reg_mask
& (1 << reg
)) == 0)
19132 save_reg_mask
|= (1 << reg
);
19135 cfun
->machine
->sibcall_blocked
= 1;
19136 save_reg_mask
|= (1 << 3);
19140 /* We may need to push an additional register for use initializing the
19141 PIC base register. */
19142 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19143 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19145 reg
= thumb_find_work_register (1 << 4);
19146 if (!call_used_regs
[reg
])
19147 save_reg_mask
|= (1 << reg
);
19150 return save_reg_mask
;
19153 /* Compute a bit mask of which registers need to be
19154 saved on the stack for the current function. */
19155 static unsigned long
19156 thumb1_compute_save_reg_mask (void)
19158 unsigned long mask
;
19162 for (reg
= 0; reg
< 12; reg
++)
19163 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19166 /* Handle the frame pointer as a special case. */
19167 if (frame_pointer_needed
)
19168 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19171 && !TARGET_SINGLE_PIC_BASE
19172 && arm_pic_register
!= INVALID_REGNUM
19173 && crtl
->uses_pic_offset_table
)
19174 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19176 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19177 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19178 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19180 /* LR will also be pushed if any lo regs are pushed. */
19181 if (mask
& 0xff || thumb_force_lr_save ())
19182 mask
|= (1 << LR_REGNUM
);
19184 /* Make sure we have a low work register if we need one.
19185 We will need one if we are going to push a high register,
19186 but we are not currently intending to push a low register. */
19187 if ((mask
& 0xff) == 0
19188 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19190 /* Use thumb_find_work_register to choose which register
19191 we will use. If the register is live then we will
19192 have to push it. Use LAST_LO_REGNUM as our fallback
19193 choice for the register to select. */
19194 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19195 /* Make sure the register returned by thumb_find_work_register is
19196 not part of the return value. */
19197 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19198 reg
= LAST_LO_REGNUM
;
19200 if (callee_saved_reg_p (reg
))
19204 /* The 504 below is 8 bytes less than 512 because there are two possible
19205 alignment words. We can't tell here if they will be present or not so we
19206 have to play it safe and assume that they are. */
19207 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19208 ROUND_UP_WORD (get_frame_size ()) +
19209 crtl
->outgoing_args_size
) >= 504)
19211 /* This is the same as the code in thumb1_expand_prologue() which
19212 determines which register to use for stack decrement. */
19213 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19214 if (mask
& (1 << reg
))
19217 if (reg
> LAST_LO_REGNUM
)
19219 /* Make sure we have a register available for stack decrement. */
19220 mask
|= 1 << LAST_LO_REGNUM
;
19228 /* Return the number of bytes required to save VFP registers. */
19230 arm_get_vfp_saved_size (void)
19232 unsigned int regno
;
19237 /* Space for saved VFP registers. */
19238 if (TARGET_HARD_FLOAT
)
19241 for (regno
= FIRST_VFP_REGNUM
;
19242 regno
< LAST_VFP_REGNUM
;
19245 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19246 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19250 /* Workaround ARM10 VFPr1 bug. */
19251 if (count
== 2 && !arm_arch6
)
19253 saved
+= count
* 8;
19262 if (count
== 2 && !arm_arch6
)
19264 saved
+= count
* 8;
19271 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19272 everything bar the final return instruction. If simple_return is true,
19273 then do not output epilogue, because it has already been emitted in RTL. */
19275 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19276 bool simple_return
)
19278 char conditional
[10];
19281 unsigned long live_regs_mask
;
19282 unsigned long func_type
;
19283 arm_stack_offsets
*offsets
;
19285 func_type
= arm_current_func_type ();
19287 if (IS_NAKED (func_type
))
19290 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19292 /* If this function was declared non-returning, and we have
19293 found a tail call, then we have to trust that the called
19294 function won't return. */
19299 /* Otherwise, trap an attempted return by aborting. */
19301 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19303 assemble_external_libcall (ops
[1]);
19304 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19310 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19312 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19314 cfun
->machine
->return_used_this_function
= 1;
19316 offsets
= arm_get_frame_offsets ();
19317 live_regs_mask
= offsets
->saved_regs_mask
;
19319 if (!simple_return
&& live_regs_mask
)
19321 const char * return_reg
;
19323 /* If we do not have any special requirements for function exit
19324 (e.g. interworking) then we can load the return address
19325 directly into the PC. Otherwise we must load it into LR. */
19327 && !IS_CMSE_ENTRY (func_type
)
19328 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19329 return_reg
= reg_names
[PC_REGNUM
];
19331 return_reg
= reg_names
[LR_REGNUM
];
19333 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19335 /* There are three possible reasons for the IP register
19336 being saved. 1) a stack frame was created, in which case
19337 IP contains the old stack pointer, or 2) an ISR routine
19338 corrupted it, or 3) it was saved to align the stack on
19339 iWMMXt. In case 1, restore IP into SP, otherwise just
19341 if (frame_pointer_needed
)
19343 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19344 live_regs_mask
|= (1 << SP_REGNUM
);
19347 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19350 /* On some ARM architectures it is faster to use LDR rather than
19351 LDM to load a single register. On other architectures, the
19352 cost is the same. In 26 bit mode, or for exception handlers,
19353 we have to use LDM to load the PC so that the CPSR is also
19355 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19356 if (live_regs_mask
== (1U << reg
))
19359 if (reg
<= LAST_ARM_REGNUM
19360 && (reg
!= LR_REGNUM
19362 || ! IS_INTERRUPT (func_type
)))
19364 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19365 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19372 /* Generate the load multiple instruction to restore the
19373 registers. Note we can get here, even if
19374 frame_pointer_needed is true, but only if sp already
19375 points to the base of the saved core registers. */
19376 if (live_regs_mask
& (1 << SP_REGNUM
))
19378 unsigned HOST_WIDE_INT stack_adjust
;
19380 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19381 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19383 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19384 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19387 /* If we can't use ldmib (SA110 bug),
19388 then try to pop r3 instead. */
19390 live_regs_mask
|= 1 << 3;
19392 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19395 /* For interrupt returns we have to use an LDM rather than
19396 a POP so that we can use the exception return variant. */
19397 else if (IS_INTERRUPT (func_type
))
19398 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19400 sprintf (instr
, "pop%s\t{", conditional
);
19402 p
= instr
+ strlen (instr
);
19404 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19405 if (live_regs_mask
& (1 << reg
))
19407 int l
= strlen (reg_names
[reg
]);
19413 memcpy (p
, ", ", 2);
19417 memcpy (p
, "%|", 2);
19418 memcpy (p
+ 2, reg_names
[reg
], l
);
19422 if (live_regs_mask
& (1 << LR_REGNUM
))
19424 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19425 /* If returning from an interrupt, restore the CPSR. */
19426 if (IS_INTERRUPT (func_type
))
19433 output_asm_insn (instr
, & operand
);
19435 /* See if we need to generate an extra instruction to
19436 perform the actual function return. */
19438 && func_type
!= ARM_FT_INTERWORKED
19439 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19441 /* The return has already been handled
19442 by loading the LR into the PC. */
19449 switch ((int) ARM_FUNC_TYPE (func_type
))
19453 /* ??? This is wrong for unified assembly syntax. */
19454 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19457 case ARM_FT_INTERWORKED
:
19458 gcc_assert (arm_arch5
|| arm_arch4t
);
19459 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19462 case ARM_FT_EXCEPTION
:
19463 /* ??? This is wrong for unified assembly syntax. */
19464 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19468 if (IS_CMSE_ENTRY (func_type
))
19470 /* Check if we have to clear the 'GE bits' which is only used if
19471 parallel add and subtraction instructions are available. */
19472 if (TARGET_INT_SIMD
)
19473 snprintf (instr
, sizeof (instr
),
19474 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19476 snprintf (instr
, sizeof (instr
),
19477 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19479 output_asm_insn (instr
, & operand
);
19480 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19482 /* Clear the cumulative exception-status bits (0-4,7) and the
19483 condition code bits (28-31) of the FPSCR. We need to
19484 remember to clear the first scratch register used (IP) and
19485 save and restore the second (r4). */
19486 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19487 output_asm_insn (instr
, & operand
);
19488 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19489 output_asm_insn (instr
, & operand
);
19490 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19491 output_asm_insn (instr
, & operand
);
19492 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19493 output_asm_insn (instr
, & operand
);
19494 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19495 output_asm_insn (instr
, & operand
);
19496 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19497 output_asm_insn (instr
, & operand
);
19498 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19499 output_asm_insn (instr
, & operand
);
19500 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19501 output_asm_insn (instr
, & operand
);
19503 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19505 /* Use bx if it's available. */
19506 else if (arm_arch5
|| arm_arch4t
)
19507 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19509 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19513 output_asm_insn (instr
, & operand
);
19519 /* Output in FILE asm statements needed to declare the NAME of the function
19520 defined by its DECL node. */
19523 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19525 size_t cmse_name_len
;
19526 char *cmse_name
= 0;
19527 char cmse_prefix
[] = "__acle_se_";
19529 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19530 extra function label for each function with the 'cmse_nonsecure_entry'
19531 attribute. This extra function label should be prepended with
19532 '__acle_se_', telling the linker that it needs to create secure gateway
19533 veneers for this function. */
19534 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19535 DECL_ATTRIBUTES (decl
)))
19537 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19538 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19539 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19540 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19542 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19543 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19546 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19547 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19548 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19549 ASM_OUTPUT_LABEL (file
, name
);
19552 ASM_OUTPUT_LABEL (file
, cmse_name
);
19554 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19557 /* Write the function name into the code section, directly preceding
19558 the function prologue.
19560 Code will be output similar to this:
19562 .ascii "arm_poke_function_name", 0
19565 .word 0xff000000 + (t1 - t0)
19566 arm_poke_function_name
19568 stmfd sp!, {fp, ip, lr, pc}
19571 When performing a stack backtrace, code can inspect the value
19572 of 'pc' stored at 'fp' + 0. If the trace function then looks
19573 at location pc - 12 and the top 8 bits are set, then we know
19574 that there is a function name embedded immediately preceding this
19575 location and has length ((pc[-3]) & 0xff000000).
19577 We assume that pc is declared as a pointer to an unsigned long.
19579 It is of no benefit to output the function name if we are assembling
19580 a leaf function. These function types will not contain a stack
19581 backtrace structure, therefore it is not possible to determine the
19584 arm_poke_function_name (FILE *stream
, const char *name
)
19586 unsigned long alignlength
;
19587 unsigned long length
;
19590 length
= strlen (name
) + 1;
19591 alignlength
= ROUND_UP_WORD (length
);
19593 ASM_OUTPUT_ASCII (stream
, name
, length
);
19594 ASM_OUTPUT_ALIGN (stream
, 2);
19595 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19596 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19599 /* Place some comments into the assembler stream
19600 describing the current function. */
19602 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19604 unsigned long func_type
;
19606 /* Sanity check. */
19607 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19609 func_type
= arm_current_func_type ();
19611 switch ((int) ARM_FUNC_TYPE (func_type
))
19614 case ARM_FT_NORMAL
:
19616 case ARM_FT_INTERWORKED
:
19617 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19620 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19623 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19625 case ARM_FT_EXCEPTION
:
19626 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19630 if (IS_NAKED (func_type
))
19631 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19633 if (IS_VOLATILE (func_type
))
19634 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19636 if (IS_NESTED (func_type
))
19637 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19638 if (IS_STACKALIGN (func_type
))
19639 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19640 if (IS_CMSE_ENTRY (func_type
))
19641 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19643 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19645 crtl
->args
.pretend_args_size
, frame_size
);
19647 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19648 frame_pointer_needed
,
19649 cfun
->machine
->uses_anonymous_args
);
19651 if (cfun
->machine
->lr_save_eliminated
)
19652 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19654 if (crtl
->calls_eh_return
)
19655 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19660 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19661 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19663 arm_stack_offsets
*offsets
;
19669 /* Emit any call-via-reg trampolines that are needed for v4t support
19670 of call_reg and call_value_reg type insns. */
19671 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19673 rtx label
= cfun
->machine
->call_via
[regno
];
19677 switch_to_section (function_section (current_function_decl
));
19678 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19679 CODE_LABEL_NUMBER (label
));
19680 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19684 /* ??? Probably not safe to set this here, since it assumes that a
19685 function will be emitted as assembly immediately after we generate
19686 RTL for it. This does not happen for inline functions. */
19687 cfun
->machine
->return_used_this_function
= 0;
19689 else /* TARGET_32BIT */
19691 /* We need to take into account any stack-frame rounding. */
19692 offsets
= arm_get_frame_offsets ();
19694 gcc_assert (!use_return_insn (FALSE
, NULL
)
19695 || (cfun
->machine
->return_used_this_function
!= 0)
19696 || offsets
->saved_regs
== offsets
->outgoing_args
19697 || frame_pointer_needed
);
19701 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19702 STR and STRD. If an even number of registers are being pushed, one
19703 or more STRD patterns are created for each register pair. If an
19704 odd number of registers are pushed, emit an initial STR followed by
19705 as many STRD instructions as are needed. This works best when the
19706 stack is initially 64-bit aligned (the normal case), since it
19707 ensures that each STRD is also 64-bit aligned. */
19709 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19714 rtx par
= NULL_RTX
;
19715 rtx dwarf
= NULL_RTX
;
19719 num_regs
= bit_count (saved_regs_mask
);
19721 /* Must be at least one register to save, and can't save SP or PC. */
19722 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19723 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19724 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19726 /* Create sequence for DWARF info. All the frame-related data for
19727 debugging is held in this wrapper. */
19728 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19730 /* Describe the stack adjustment. */
19731 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19732 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19733 RTX_FRAME_RELATED_P (tmp
) = 1;
19734 XVECEXP (dwarf
, 0, 0) = tmp
;
19736 /* Find the first register. */
19737 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19742 /* If there's an odd number of registers to push. Start off by
19743 pushing a single register. This ensures that subsequent strd
19744 operations are dword aligned (assuming that SP was originally
19745 64-bit aligned). */
19746 if ((num_regs
& 1) != 0)
19748 rtx reg
, mem
, insn
;
19750 reg
= gen_rtx_REG (SImode
, regno
);
19752 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19753 stack_pointer_rtx
));
19755 mem
= gen_frame_mem (Pmode
,
19757 (Pmode
, stack_pointer_rtx
,
19758 plus_constant (Pmode
, stack_pointer_rtx
,
19761 tmp
= gen_rtx_SET (mem
, reg
);
19762 RTX_FRAME_RELATED_P (tmp
) = 1;
19763 insn
= emit_insn (tmp
);
19764 RTX_FRAME_RELATED_P (insn
) = 1;
19765 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19766 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19767 RTX_FRAME_RELATED_P (tmp
) = 1;
19770 XVECEXP (dwarf
, 0, i
) = tmp
;
19774 while (i
< num_regs
)
19775 if (saved_regs_mask
& (1 << regno
))
19777 rtx reg1
, reg2
, mem1
, mem2
;
19778 rtx tmp0
, tmp1
, tmp2
;
19781 /* Find the register to pair with this one. */
19782 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19786 reg1
= gen_rtx_REG (SImode
, regno
);
19787 reg2
= gen_rtx_REG (SImode
, regno2
);
19794 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19797 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19799 -4 * (num_regs
- 1)));
19800 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19801 plus_constant (Pmode
, stack_pointer_rtx
,
19803 tmp1
= gen_rtx_SET (mem1
, reg1
);
19804 tmp2
= gen_rtx_SET (mem2
, reg2
);
19805 RTX_FRAME_RELATED_P (tmp0
) = 1;
19806 RTX_FRAME_RELATED_P (tmp1
) = 1;
19807 RTX_FRAME_RELATED_P (tmp2
) = 1;
19808 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19809 XVECEXP (par
, 0, 0) = tmp0
;
19810 XVECEXP (par
, 0, 1) = tmp1
;
19811 XVECEXP (par
, 0, 2) = tmp2
;
19812 insn
= emit_insn (par
);
19813 RTX_FRAME_RELATED_P (insn
) = 1;
19814 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19818 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19821 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19824 tmp1
= gen_rtx_SET (mem1
, reg1
);
19825 tmp2
= gen_rtx_SET (mem2
, reg2
);
19826 RTX_FRAME_RELATED_P (tmp1
) = 1;
19827 RTX_FRAME_RELATED_P (tmp2
) = 1;
19828 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19829 XVECEXP (par
, 0, 0) = tmp1
;
19830 XVECEXP (par
, 0, 1) = tmp2
;
19834 /* Create unwind information. This is an approximation. */
19835 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19836 plus_constant (Pmode
,
19840 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19841 plus_constant (Pmode
,
19846 RTX_FRAME_RELATED_P (tmp1
) = 1;
19847 RTX_FRAME_RELATED_P (tmp2
) = 1;
19848 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19849 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19851 regno
= regno2
+ 1;
19859 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19860 whenever possible, otherwise it emits single-word stores. The first store
19861 also allocates stack space for all saved registers, using writeback with
19862 post-addressing mode. All other stores use offset addressing. If no STRD
19863 can be emitted, this function emits a sequence of single-word stores,
19864 and not an STM as before, because single-word stores provide more freedom
19865 scheduling and can be turned into an STM by peephole optimizations. */
19867 arm_emit_strd_push (unsigned long saved_regs_mask
)
19870 int i
, j
, dwarf_index
= 0;
19872 rtx dwarf
= NULL_RTX
;
19873 rtx insn
= NULL_RTX
;
19876 /* TODO: A more efficient code can be emitted by changing the
19877 layout, e.g., first push all pairs that can use STRD to keep the
19878 stack aligned, and then push all other registers. */
19879 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19880 if (saved_regs_mask
& (1 << i
))
19883 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19884 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19885 gcc_assert (num_regs
> 0);
19887 /* Create sequence for DWARF info. */
19888 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19890 /* For dwarf info, we generate explicit stack update. */
19891 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19892 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19893 RTX_FRAME_RELATED_P (tmp
) = 1;
19894 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19896 /* Save registers. */
19897 offset
= - 4 * num_regs
;
19899 while (j
<= LAST_ARM_REGNUM
)
19900 if (saved_regs_mask
& (1 << j
))
19903 && (saved_regs_mask
& (1 << (j
+ 1))))
19905 /* Current register and previous register form register pair for
19906 which STRD can be generated. */
19909 /* Allocate stack space for all saved registers. */
19910 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19911 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19912 mem
= gen_frame_mem (DImode
, tmp
);
19915 else if (offset
> 0)
19916 mem
= gen_frame_mem (DImode
,
19917 plus_constant (Pmode
,
19921 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19923 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19924 RTX_FRAME_RELATED_P (tmp
) = 1;
19925 tmp
= emit_insn (tmp
);
19927 /* Record the first store insn. */
19928 if (dwarf_index
== 1)
19931 /* Generate dwarf info. */
19932 mem
= gen_frame_mem (SImode
,
19933 plus_constant (Pmode
,
19936 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19937 RTX_FRAME_RELATED_P (tmp
) = 1;
19938 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19940 mem
= gen_frame_mem (SImode
,
19941 plus_constant (Pmode
,
19944 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
19945 RTX_FRAME_RELATED_P (tmp
) = 1;
19946 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19953 /* Emit a single word store. */
19956 /* Allocate stack space for all saved registers. */
19957 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19958 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19959 mem
= gen_frame_mem (SImode
, tmp
);
19962 else if (offset
> 0)
19963 mem
= gen_frame_mem (SImode
,
19964 plus_constant (Pmode
,
19968 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19970 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19971 RTX_FRAME_RELATED_P (tmp
) = 1;
19972 tmp
= emit_insn (tmp
);
19974 /* Record the first store insn. */
19975 if (dwarf_index
== 1)
19978 /* Generate dwarf info. */
19979 mem
= gen_frame_mem (SImode
,
19980 plus_constant(Pmode
,
19983 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19984 RTX_FRAME_RELATED_P (tmp
) = 1;
19985 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19994 /* Attach dwarf info to the first insn we generate. */
19995 gcc_assert (insn
!= NULL_RTX
);
19996 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19997 RTX_FRAME_RELATED_P (insn
) = 1;
20000 /* Generate and emit an insn that we will recognize as a push_multi.
20001 Unfortunately, since this insn does not reflect very well the actual
20002 semantics of the operation, we need to annotate the insn for the benefit
20003 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20004 MASK for registers that should be annotated for DWARF2 frame unwind
20007 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20010 int num_dwarf_regs
= 0;
20014 int dwarf_par_index
;
20017 /* We don't record the PC in the dwarf frame information. */
20018 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20020 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20022 if (mask
& (1 << i
))
20024 if (dwarf_regs_mask
& (1 << i
))
20028 gcc_assert (num_regs
&& num_regs
<= 16);
20029 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20031 /* For the body of the insn we are going to generate an UNSPEC in
20032 parallel with several USEs. This allows the insn to be recognized
20033 by the push_multi pattern in the arm.md file.
20035 The body of the insn looks something like this:
20038 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20039 (const_int:SI <num>)))
20040 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20046 For the frame note however, we try to be more explicit and actually
20047 show each register being stored into the stack frame, plus a (single)
20048 decrement of the stack pointer. We do it this way in order to be
20049 friendly to the stack unwinding code, which only wants to see a single
20050 stack decrement per instruction. The RTL we generate for the note looks
20051 something like this:
20054 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20055 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20056 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20057 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20061 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20062 instead we'd have a parallel expression detailing all
20063 the stores to the various memory addresses so that debug
20064 information is more up-to-date. Remember however while writing
20065 this to take care of the constraints with the push instruction.
20067 Note also that this has to be taken care of for the VFP registers.
20069 For more see PR43399. */
20071 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20072 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20073 dwarf_par_index
= 1;
20075 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20077 if (mask
& (1 << i
))
20079 reg
= gen_rtx_REG (SImode
, i
);
20081 XVECEXP (par
, 0, 0)
20082 = gen_rtx_SET (gen_frame_mem
20084 gen_rtx_PRE_MODIFY (Pmode
,
20087 (Pmode
, stack_pointer_rtx
,
20090 gen_rtx_UNSPEC (BLKmode
,
20091 gen_rtvec (1, reg
),
20092 UNSPEC_PUSH_MULT
));
20094 if (dwarf_regs_mask
& (1 << i
))
20096 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20098 RTX_FRAME_RELATED_P (tmp
) = 1;
20099 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20106 for (j
= 1, i
++; j
< num_regs
; i
++)
20108 if (mask
& (1 << i
))
20110 reg
= gen_rtx_REG (SImode
, i
);
20112 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20114 if (dwarf_regs_mask
& (1 << i
))
20117 = gen_rtx_SET (gen_frame_mem
20119 plus_constant (Pmode
, stack_pointer_rtx
,
20122 RTX_FRAME_RELATED_P (tmp
) = 1;
20123 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20130 par
= emit_insn (par
);
20132 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20133 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20134 RTX_FRAME_RELATED_P (tmp
) = 1;
20135 XVECEXP (dwarf
, 0, 0) = tmp
;
20137 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20142 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20143 SIZE is the offset to be adjusted.
20144 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20146 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20150 RTX_FRAME_RELATED_P (insn
) = 1;
20151 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20152 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20155 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20156 SAVED_REGS_MASK shows which registers need to be restored.
20158 Unfortunately, since this insn does not reflect very well the actual
20159 semantics of the operation, we need to annotate the insn for the benefit
20160 of DWARF2 frame unwind information. */
20162 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20167 rtx dwarf
= NULL_RTX
;
20169 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20173 offset_adj
= return_in_pc
? 1 : 0;
20174 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20175 if (saved_regs_mask
& (1 << i
))
20178 gcc_assert (num_regs
&& num_regs
<= 16);
20180 /* If SP is in reglist, then we don't emit SP update insn. */
20181 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20183 /* The parallel needs to hold num_regs SETs
20184 and one SET for the stack update. */
20185 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20188 XVECEXP (par
, 0, 0) = ret_rtx
;
20192 /* Increment the stack pointer, based on there being
20193 num_regs 4-byte registers to restore. */
20194 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20195 plus_constant (Pmode
,
20198 RTX_FRAME_RELATED_P (tmp
) = 1;
20199 XVECEXP (par
, 0, offset_adj
) = tmp
;
20202 /* Now restore every reg, which may include PC. */
20203 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20204 if (saved_regs_mask
& (1 << i
))
20206 reg
= gen_rtx_REG (SImode
, i
);
20207 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20209 /* Emit single load with writeback. */
20210 tmp
= gen_frame_mem (SImode
,
20211 gen_rtx_POST_INC (Pmode
,
20212 stack_pointer_rtx
));
20213 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20214 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20218 tmp
= gen_rtx_SET (reg
,
20221 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20222 RTX_FRAME_RELATED_P (tmp
) = 1;
20223 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20225 /* We need to maintain a sequence for DWARF info too. As dwarf info
20226 should not have PC, skip PC. */
20227 if (i
!= PC_REGNUM
)
20228 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20234 par
= emit_jump_insn (par
);
20236 par
= emit_insn (par
);
20238 REG_NOTES (par
) = dwarf
;
20240 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20241 stack_pointer_rtx
, stack_pointer_rtx
);
20244 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20245 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20247 Unfortunately, since this insn does not reflect very well the actual
20248 semantics of the operation, we need to annotate the insn for the benefit
20249 of DWARF2 frame unwind information. */
20251 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20255 rtx dwarf
= NULL_RTX
;
20258 gcc_assert (num_regs
&& num_regs
<= 32);
20260 /* Workaround ARM10 VFPr1 bug. */
20261 if (num_regs
== 2 && !arm_arch6
)
20263 if (first_reg
== 15)
20269 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20270 there could be up to 32 D-registers to restore.
20271 If there are more than 16 D-registers, make two recursive calls,
20272 each of which emits one pop_multi instruction. */
20275 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20276 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20280 /* The parallel needs to hold num_regs SETs
20281 and one SET for the stack update. */
20282 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20284 /* Increment the stack pointer, based on there being
20285 num_regs 8-byte registers to restore. */
20286 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20287 RTX_FRAME_RELATED_P (tmp
) = 1;
20288 XVECEXP (par
, 0, 0) = tmp
;
20290 /* Now show every reg that will be restored, using a SET for each. */
20291 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20293 reg
= gen_rtx_REG (DFmode
, i
);
20295 tmp
= gen_rtx_SET (reg
,
20298 plus_constant (Pmode
, base_reg
, 8 * j
)));
20299 RTX_FRAME_RELATED_P (tmp
) = 1;
20300 XVECEXP (par
, 0, j
+ 1) = tmp
;
20302 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20307 par
= emit_insn (par
);
20308 REG_NOTES (par
) = dwarf
;
20310 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20311 if (REGNO (base_reg
) == IP_REGNUM
)
20313 RTX_FRAME_RELATED_P (par
) = 1;
20314 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20317 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20318 base_reg
, base_reg
);
20321 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20322 number of registers are being popped, multiple LDRD patterns are created for
20323 all register pairs. If odd number of registers are popped, last register is
20324 loaded by using LDR pattern. */
20326 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20330 rtx par
= NULL_RTX
;
20331 rtx dwarf
= NULL_RTX
;
20332 rtx tmp
, reg
, tmp1
;
20333 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20335 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20336 if (saved_regs_mask
& (1 << i
))
20339 gcc_assert (num_regs
&& num_regs
<= 16);
20341 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20342 to be popped. So, if num_regs is even, now it will become odd,
20343 and we can generate pop with PC. If num_regs is odd, it will be
20344 even now, and ldr with return can be generated for PC. */
20348 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20350 /* Var j iterates over all the registers to gather all the registers in
20351 saved_regs_mask. Var i gives index of saved registers in stack frame.
20352 A PARALLEL RTX of register-pair is created here, so that pattern for
20353 LDRD can be matched. As PC is always last register to be popped, and
20354 we have already decremented num_regs if PC, we don't have to worry
20355 about PC in this loop. */
20356 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20357 if (saved_regs_mask
& (1 << j
))
20359 /* Create RTX for memory load. */
20360 reg
= gen_rtx_REG (SImode
, j
);
20361 tmp
= gen_rtx_SET (reg
,
20362 gen_frame_mem (SImode
,
20363 plus_constant (Pmode
,
20364 stack_pointer_rtx
, 4 * i
)));
20365 RTX_FRAME_RELATED_P (tmp
) = 1;
20369 /* When saved-register index (i) is even, the RTX to be emitted is
20370 yet to be created. Hence create it first. The LDRD pattern we
20371 are generating is :
20372 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20373 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20374 where target registers need not be consecutive. */
20375 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20379 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20380 added as 0th element and if i is odd, reg_i is added as 1st element
20381 of LDRD pattern shown above. */
20382 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20383 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20387 /* When saved-register index (i) is odd, RTXs for both the registers
20388 to be loaded are generated in above given LDRD pattern, and the
20389 pattern can be emitted now. */
20390 par
= emit_insn (par
);
20391 REG_NOTES (par
) = dwarf
;
20392 RTX_FRAME_RELATED_P (par
) = 1;
20398 /* If the number of registers pushed is odd AND return_in_pc is false OR
20399 number of registers are even AND return_in_pc is true, last register is
20400 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20401 then LDR with post increment. */
20403 /* Increment the stack pointer, based on there being
20404 num_regs 4-byte registers to restore. */
20405 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20406 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20407 RTX_FRAME_RELATED_P (tmp
) = 1;
20408 tmp
= emit_insn (tmp
);
20411 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20412 stack_pointer_rtx
, stack_pointer_rtx
);
20417 if (((num_regs
% 2) == 1 && !return_in_pc
)
20418 || ((num_regs
% 2) == 0 && return_in_pc
))
20420 /* Scan for the single register to be popped. Skip until the saved
20421 register is found. */
20422 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20424 /* Gen LDR with post increment here. */
20425 tmp1
= gen_rtx_MEM (SImode
,
20426 gen_rtx_POST_INC (SImode
,
20427 stack_pointer_rtx
));
20428 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20430 reg
= gen_rtx_REG (SImode
, j
);
20431 tmp
= gen_rtx_SET (reg
, tmp1
);
20432 RTX_FRAME_RELATED_P (tmp
) = 1;
20433 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20437 /* If return_in_pc, j must be PC_REGNUM. */
20438 gcc_assert (j
== PC_REGNUM
);
20439 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20440 XVECEXP (par
, 0, 0) = ret_rtx
;
20441 XVECEXP (par
, 0, 1) = tmp
;
20442 par
= emit_jump_insn (par
);
20446 par
= emit_insn (tmp
);
20447 REG_NOTES (par
) = dwarf
;
20448 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20449 stack_pointer_rtx
, stack_pointer_rtx
);
20453 else if ((num_regs
% 2) == 1 && return_in_pc
)
20455 /* There are 2 registers to be popped. So, generate the pattern
20456 pop_multiple_with_stack_update_and_return to pop in PC. */
20457 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20463 /* LDRD in ARM mode needs consecutive registers as operands. This function
20464 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20465 offset addressing and then generates one separate stack udpate. This provides
20466 more scheduling freedom, compared to writeback on every load. However,
20467 if the function returns using load into PC directly
20468 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20469 before the last load. TODO: Add a peephole optimization to recognize
20470 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20471 peephole optimization to merge the load at stack-offset zero
20472 with the stack update instruction using load with writeback
20473 in post-index addressing mode. */
20475 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20479 rtx par
= NULL_RTX
;
20480 rtx dwarf
= NULL_RTX
;
20483 /* Restore saved registers. */
20484 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20486 while (j
<= LAST_ARM_REGNUM
)
20487 if (saved_regs_mask
& (1 << j
))
20490 && (saved_regs_mask
& (1 << (j
+ 1)))
20491 && (j
+ 1) != PC_REGNUM
)
20493 /* Current register and next register form register pair for which
20494 LDRD can be generated. PC is always the last register popped, and
20495 we handle it separately. */
20497 mem
= gen_frame_mem (DImode
,
20498 plus_constant (Pmode
,
20502 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20504 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20505 tmp
= emit_insn (tmp
);
20506 RTX_FRAME_RELATED_P (tmp
) = 1;
20508 /* Generate dwarf info. */
20510 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20511 gen_rtx_REG (SImode
, j
),
20513 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20514 gen_rtx_REG (SImode
, j
+ 1),
20517 REG_NOTES (tmp
) = dwarf
;
20522 else if (j
!= PC_REGNUM
)
20524 /* Emit a single word load. */
20526 mem
= gen_frame_mem (SImode
,
20527 plus_constant (Pmode
,
20531 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20533 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20534 tmp
= emit_insn (tmp
);
20535 RTX_FRAME_RELATED_P (tmp
) = 1;
20537 /* Generate dwarf info. */
20538 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20539 gen_rtx_REG (SImode
, j
),
20545 else /* j == PC_REGNUM */
20551 /* Update the stack. */
20554 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20555 plus_constant (Pmode
,
20558 tmp
= emit_insn (tmp
);
20559 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20560 stack_pointer_rtx
, stack_pointer_rtx
);
20564 if (saved_regs_mask
& (1 << PC_REGNUM
))
20566 /* Only PC is to be popped. */
20567 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20568 XVECEXP (par
, 0, 0) = ret_rtx
;
20569 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20570 gen_frame_mem (SImode
,
20571 gen_rtx_POST_INC (SImode
,
20572 stack_pointer_rtx
)));
20573 RTX_FRAME_RELATED_P (tmp
) = 1;
20574 XVECEXP (par
, 0, 1) = tmp
;
20575 par
= emit_jump_insn (par
);
20577 /* Generate dwarf info. */
20578 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20579 gen_rtx_REG (SImode
, PC_REGNUM
),
20581 REG_NOTES (par
) = dwarf
;
20582 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20583 stack_pointer_rtx
, stack_pointer_rtx
);
20587 /* Calculate the size of the return value that is passed in registers. */
20589 arm_size_return_regs (void)
20593 if (crtl
->return_rtx
!= 0)
20594 mode
= GET_MODE (crtl
->return_rtx
);
20596 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20598 return GET_MODE_SIZE (mode
);
20601 /* Return true if the current function needs to save/restore LR. */
20603 thumb_force_lr_save (void)
20605 return !cfun
->machine
->lr_save_eliminated
20607 || thumb_far_jump_used_p ()
20608 || df_regs_ever_live_p (LR_REGNUM
));
20611 /* We do not know if r3 will be available because
20612 we do have an indirect tailcall happening in this
20613 particular case. */
20615 is_indirect_tailcall_p (rtx call
)
20617 rtx pat
= PATTERN (call
);
20619 /* Indirect tail call. */
20620 pat
= XVECEXP (pat
, 0, 0);
20621 if (GET_CODE (pat
) == SET
)
20622 pat
= SET_SRC (pat
);
20624 pat
= XEXP (XEXP (pat
, 0), 0);
20625 return REG_P (pat
);
20628 /* Return true if r3 is used by any of the tail call insns in the
20629 current function. */
20631 any_sibcall_could_use_r3 (void)
20636 if (!crtl
->tail_call_emit
)
20638 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20639 if (e
->flags
& EDGE_SIBCALL
)
20641 rtx_insn
*call
= BB_END (e
->src
);
20642 if (!CALL_P (call
))
20643 call
= prev_nonnote_nondebug_insn (call
);
20644 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20645 if (find_regno_fusage (call
, USE
, 3)
20646 || is_indirect_tailcall_p (call
))
20653 /* Compute the distance from register FROM to register TO.
20654 These can be the arg pointer (26), the soft frame pointer (25),
20655 the stack pointer (13) or the hard frame pointer (11).
20656 In thumb mode r7 is used as the soft frame pointer, if needed.
20657 Typical stack layout looks like this:
20659 old stack pointer -> | |
20662 | | saved arguments for
20663 | | vararg functions
20666 hard FP & arg pointer -> | | \
20674 soft frame pointer -> | | /
20679 locals base pointer -> | | /
20684 current stack pointer -> | | /
20687 For a given function some or all of these stack components
20688 may not be needed, giving rise to the possibility of
20689 eliminating some of the registers.
20691 The values returned by this function must reflect the behavior
20692 of arm_expand_prologue() and arm_compute_save_reg_mask().
20694 The sign of the number returned reflects the direction of stack
20695 growth, so the values are positive for all eliminations except
20696 from the soft frame pointer to the hard frame pointer.
20698 SFP may point just inside the local variables block to ensure correct
20702 /* Calculate stack offsets. These are used to calculate register elimination
20703 offsets and in prologue/epilogue code. Also calculates which registers
20704 should be saved. */
20706 static arm_stack_offsets
*
20707 arm_get_frame_offsets (void)
20709 struct arm_stack_offsets
*offsets
;
20710 unsigned long func_type
;
20713 HOST_WIDE_INT frame_size
;
20716 offsets
= &cfun
->machine
->stack_offsets
;
20718 if (reload_completed
)
20721 /* Initially this is the size of the local variables. It will translated
20722 into an offset once we have determined the size of preceding data. */
20723 frame_size
= ROUND_UP_WORD (get_frame_size ());
20725 /* Space for variadic functions. */
20726 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20728 /* In Thumb mode this is incorrect, but never used. */
20730 = (offsets
->saved_args
20731 + arm_compute_static_chain_stack_bytes ()
20732 + (frame_pointer_needed
? 4 : 0));
20736 unsigned int regno
;
20738 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20739 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20740 saved
= core_saved
;
20742 /* We know that SP will be doubleword aligned on entry, and we must
20743 preserve that condition at any subroutine call. We also require the
20744 soft frame pointer to be doubleword aligned. */
20746 if (TARGET_REALLY_IWMMXT
)
20748 /* Check for the call-saved iWMMXt registers. */
20749 for (regno
= FIRST_IWMMXT_REGNUM
;
20750 regno
<= LAST_IWMMXT_REGNUM
;
20752 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20756 func_type
= arm_current_func_type ();
20757 /* Space for saved VFP registers. */
20758 if (! IS_VOLATILE (func_type
)
20759 && TARGET_HARD_FLOAT
)
20760 saved
+= arm_get_vfp_saved_size ();
20762 else /* TARGET_THUMB1 */
20764 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20765 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20766 saved
= core_saved
;
20767 if (TARGET_BACKTRACE
)
20771 /* Saved registers include the stack frame. */
20772 offsets
->saved_regs
20773 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20774 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20776 /* A leaf function does not need any stack alignment if it has nothing
20778 if (crtl
->is_leaf
&& frame_size
== 0
20779 /* However if it calls alloca(), we have a dynamically allocated
20780 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20781 && ! cfun
->calls_alloca
)
20783 offsets
->outgoing_args
= offsets
->soft_frame
;
20784 offsets
->locals_base
= offsets
->soft_frame
;
20788 /* Ensure SFP has the correct alignment. */
20789 if (ARM_DOUBLEWORD_ALIGN
20790 && (offsets
->soft_frame
& 7))
20792 offsets
->soft_frame
+= 4;
20793 /* Try to align stack by pushing an extra reg. Don't bother doing this
20794 when there is a stack frame as the alignment will be rolled into
20795 the normal stack adjustment. */
20796 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20800 /* Register r3 is caller-saved. Normally it does not need to be
20801 saved on entry by the prologue. However if we choose to save
20802 it for padding then we may confuse the compiler into thinking
20803 a prologue sequence is required when in fact it is not. This
20804 will occur when shrink-wrapping if r3 is used as a scratch
20805 register and there are no other callee-saved writes.
20807 This situation can be avoided when other callee-saved registers
20808 are available and r3 is not mandatory if we choose a callee-saved
20809 register for padding. */
20810 bool prefer_callee_reg_p
= false;
20812 /* If it is safe to use r3, then do so. This sometimes
20813 generates better code on Thumb-2 by avoiding the need to
20814 use 32-bit push/pop instructions. */
20815 if (! any_sibcall_could_use_r3 ()
20816 && arm_size_return_regs () <= 12
20817 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20819 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20822 if (!TARGET_THUMB2
)
20823 prefer_callee_reg_p
= true;
20826 || prefer_callee_reg_p
)
20828 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20830 /* Avoid fixed registers; they may be changed at
20831 arbitrary times so it's unsafe to restore them
20832 during the epilogue. */
20834 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20844 offsets
->saved_regs
+= 4;
20845 offsets
->saved_regs_mask
|= (1 << reg
);
20850 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20851 offsets
->outgoing_args
= (offsets
->locals_base
20852 + crtl
->outgoing_args_size
);
20854 if (ARM_DOUBLEWORD_ALIGN
)
20856 /* Ensure SP remains doubleword aligned. */
20857 if (offsets
->outgoing_args
& 7)
20858 offsets
->outgoing_args
+= 4;
20859 gcc_assert (!(offsets
->outgoing_args
& 7));
20866 /* Calculate the relative offsets for the different stack pointers. Positive
20867 offsets are in the direction of stack growth. */
20870 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20872 arm_stack_offsets
*offsets
;
20874 offsets
= arm_get_frame_offsets ();
20876 /* OK, now we have enough information to compute the distances.
20877 There must be an entry in these switch tables for each pair
20878 of registers in ELIMINABLE_REGS, even if some of the entries
20879 seem to be redundant or useless. */
20882 case ARG_POINTER_REGNUM
:
20885 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20888 case FRAME_POINTER_REGNUM
:
20889 /* This is the reverse of the soft frame pointer
20890 to hard frame pointer elimination below. */
20891 return offsets
->soft_frame
- offsets
->saved_args
;
20893 case ARM_HARD_FRAME_POINTER_REGNUM
:
20894 /* This is only non-zero in the case where the static chain register
20895 is stored above the frame. */
20896 return offsets
->frame
- offsets
->saved_args
- 4;
20898 case STACK_POINTER_REGNUM
:
20899 /* If nothing has been pushed on the stack at all
20900 then this will return -4. This *is* correct! */
20901 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20904 gcc_unreachable ();
20906 gcc_unreachable ();
20908 case FRAME_POINTER_REGNUM
:
20911 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20914 case ARM_HARD_FRAME_POINTER_REGNUM
:
20915 /* The hard frame pointer points to the top entry in the
20916 stack frame. The soft frame pointer to the bottom entry
20917 in the stack frame. If there is no stack frame at all,
20918 then they are identical. */
20920 return offsets
->frame
- offsets
->soft_frame
;
20922 case STACK_POINTER_REGNUM
:
20923 return offsets
->outgoing_args
- offsets
->soft_frame
;
20926 gcc_unreachable ();
20928 gcc_unreachable ();
20931 /* You cannot eliminate from the stack pointer.
20932 In theory you could eliminate from the hard frame
20933 pointer to the stack pointer, but this will never
20934 happen, since if a stack frame is not needed the
20935 hard frame pointer will never be used. */
20936 gcc_unreachable ();
20940 /* Given FROM and TO register numbers, say whether this elimination is
20941 allowed. Frame pointer elimination is automatically handled.
20943 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20944 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20945 pointer, we must eliminate FRAME_POINTER_REGNUM into
20946 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20947 ARG_POINTER_REGNUM. */
20950 arm_can_eliminate (const int from
, const int to
)
20952 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20953 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20954 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20955 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20959 /* Emit RTL to save coprocessor registers on function entry. Returns the
20960 number of bytes pushed. */
20963 arm_save_coproc_regs(void)
20965 int saved_size
= 0;
20967 unsigned start_reg
;
20970 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20971 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20973 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20974 insn
= gen_rtx_MEM (V2SImode
, insn
);
20975 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20976 RTX_FRAME_RELATED_P (insn
) = 1;
20980 if (TARGET_HARD_FLOAT
)
20982 start_reg
= FIRST_VFP_REGNUM
;
20984 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20986 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20987 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20989 if (start_reg
!= reg
)
20990 saved_size
+= vfp_emit_fstmd (start_reg
,
20991 (reg
- start_reg
) / 2);
20992 start_reg
= reg
+ 2;
20995 if (start_reg
!= reg
)
20996 saved_size
+= vfp_emit_fstmd (start_reg
,
20997 (reg
- start_reg
) / 2);
21003 /* Set the Thumb frame pointer from the stack pointer. */
21006 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21008 HOST_WIDE_INT amount
;
21011 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21013 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21014 stack_pointer_rtx
, GEN_INT (amount
)));
21017 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21018 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21019 expects the first two operands to be the same. */
21022 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21024 hard_frame_pointer_rtx
));
21028 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21029 hard_frame_pointer_rtx
,
21030 stack_pointer_rtx
));
21032 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21033 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21034 RTX_FRAME_RELATED_P (dwarf
) = 1;
21035 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21038 RTX_FRAME_RELATED_P (insn
) = 1;
21041 struct scratch_reg
{
21046 /* Return a short-lived scratch register for use as a 2nd scratch register on
21047 function entry after the registers are saved in the prologue. This register
21048 must be released by means of release_scratch_register_on_entry. IP is not
21049 considered since it is always used as the 1st scratch register if available.
21051 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21052 mask of live registers. */
21055 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21056 unsigned long live_regs
)
21062 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21068 for (i
= 4; i
< 11; i
++)
21069 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21077 /* If IP is used as the 1st scratch register for a nested function,
21078 then either r3 wasn't available or is used to preserve IP. */
21079 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21081 regno
= (regno1
== 3 ? 2 : 3);
21083 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21088 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21091 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21092 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21093 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21094 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21095 RTX_FRAME_RELATED_P (insn
) = 1;
21096 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21100 /* Release a scratch register obtained from the preceding function. */
21103 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21107 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21108 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21109 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21110 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21111 RTX_FRAME_RELATED_P (insn
) = 1;
21112 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21116 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21118 #if PROBE_INTERVAL > 4096
21119 #error Cannot use indexed addressing mode for stack probing
21122 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21123 inclusive. These are offsets from the current stack pointer. REGNO1
21124 is the index number of the 1st scratch register and LIVE_REGS is the
21125 mask of live registers. */
21128 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21129 unsigned int regno1
, unsigned long live_regs
)
21131 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21133 /* See if we have a constant small number of probes to generate. If so,
21134 that's the easy case. */
21135 if (size
<= PROBE_INTERVAL
)
21137 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21138 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21139 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21142 /* The run-time loop is made up of 10 insns in the generic case while the
21143 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21144 else if (size
<= 5 * PROBE_INTERVAL
)
21146 HOST_WIDE_INT i
, rem
;
21148 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21149 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21150 emit_stack_probe (reg1
);
21152 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21153 it exceeds SIZE. If only two probes are needed, this will not
21154 generate any code. Then probe at FIRST + SIZE. */
21155 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21157 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21158 emit_stack_probe (reg1
);
21161 rem
= size
- (i
- PROBE_INTERVAL
);
21162 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21164 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21165 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21168 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21171 /* Otherwise, do the same as above, but in a loop. Note that we must be
21172 extra careful with variables wrapping around because we might be at
21173 the very top (or the very bottom) of the address space and we have
21174 to be able to handle this case properly; in particular, we use an
21175 equality test for the loop condition. */
21178 HOST_WIDE_INT rounded_size
;
21179 struct scratch_reg sr
;
21181 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21183 emit_move_insn (reg1
, GEN_INT (first
));
21186 /* Step 1: round SIZE to the previous multiple of the interval. */
21188 rounded_size
= size
& -PROBE_INTERVAL
;
21189 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21192 /* Step 2: compute initial and final value of the loop counter. */
21194 /* TEST_ADDR = SP + FIRST. */
21195 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21197 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21198 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21201 /* Step 3: the loop
21205 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21208 while (TEST_ADDR != LAST_ADDR)
21210 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21211 until it is equal to ROUNDED_SIZE. */
21213 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21216 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21217 that SIZE is equal to ROUNDED_SIZE. */
21219 if (size
!= rounded_size
)
21221 HOST_WIDE_INT rem
= size
- rounded_size
;
21223 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21225 emit_set_insn (sr
.reg
,
21226 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21227 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21228 PROBE_INTERVAL
- rem
));
21231 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21234 release_scratch_register_on_entry (&sr
);
21237 /* Make sure nothing is scheduled before we are done. */
21238 emit_insn (gen_blockage ());
21241 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21242 absolute addresses. */
21245 output_probe_stack_range (rtx reg1
, rtx reg2
)
21247 static int labelno
= 0;
21251 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21254 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21256 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21258 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21259 output_asm_insn ("sub\t%0, %0, %1", xops
);
21261 /* Probe at TEST_ADDR. */
21262 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21264 /* Test if TEST_ADDR == LAST_ADDR. */
21266 output_asm_insn ("cmp\t%0, %1", xops
);
21269 fputs ("\tbne\t", asm_out_file
);
21270 assemble_name_raw (asm_out_file
, loop_lab
);
21271 fputc ('\n', asm_out_file
);
21276 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21279 arm_expand_prologue (void)
21284 unsigned long live_regs_mask
;
21285 unsigned long func_type
;
21287 int saved_pretend_args
= 0;
21288 int saved_regs
= 0;
21289 unsigned HOST_WIDE_INT args_to_push
;
21290 HOST_WIDE_INT size
;
21291 arm_stack_offsets
*offsets
;
21294 func_type
= arm_current_func_type ();
21296 /* Naked functions don't have prologues. */
21297 if (IS_NAKED (func_type
))
21299 if (flag_stack_usage_info
)
21300 current_function_static_stack_size
= 0;
21304 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21305 args_to_push
= crtl
->args
.pretend_args_size
;
21307 /* Compute which register we will have to save onto the stack. */
21308 offsets
= arm_get_frame_offsets ();
21309 live_regs_mask
= offsets
->saved_regs_mask
;
21311 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21313 if (IS_STACKALIGN (func_type
))
21317 /* Handle a word-aligned stack pointer. We generate the following:
21322 <save and restore r0 in normal prologue/epilogue>
21326 The unwinder doesn't need to know about the stack realignment.
21327 Just tell it we saved SP in r0. */
21328 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21330 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21331 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21333 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21334 RTX_FRAME_RELATED_P (insn
) = 1;
21335 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21337 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21339 /* ??? The CFA changes here, which may cause GDB to conclude that it
21340 has entered a different function. That said, the unwind info is
21341 correct, individually, before and after this instruction because
21342 we've described the save of SP, which will override the default
21343 handling of SP as restoring from the CFA. */
21344 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21347 /* The static chain register is the same as the IP register. If it is
21348 clobbered when creating the frame, we need to save and restore it. */
21349 clobber_ip
= IS_NESTED (func_type
)
21350 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21351 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21352 && !df_regs_ever_live_p (LR_REGNUM
)
21353 && arm_r3_live_at_start_p ()));
21355 /* Find somewhere to store IP whilst the frame is being created.
21356 We try the following places in order:
21358 1. The last argument register r3 if it is available.
21359 2. A slot on the stack above the frame if there are no
21360 arguments to push onto the stack.
21361 3. Register r3 again, after pushing the argument registers
21362 onto the stack, if this is a varargs function.
21363 4. The last slot on the stack created for the arguments to
21364 push, if this isn't a varargs function.
21366 Note - we only need to tell the dwarf2 backend about the SP
21367 adjustment in the second variant; the static chain register
21368 doesn't need to be unwound, as it doesn't contain a value
21369 inherited from the caller. */
21372 if (!arm_r3_live_at_start_p ())
21373 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21374 else if (args_to_push
== 0)
21378 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21381 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21382 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21385 /* Just tell the dwarf backend that we adjusted SP. */
21386 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21387 plus_constant (Pmode
, stack_pointer_rtx
,
21389 RTX_FRAME_RELATED_P (insn
) = 1;
21390 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21394 /* Store the args on the stack. */
21395 if (cfun
->machine
->uses_anonymous_args
)
21397 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21398 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21399 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21400 saved_pretend_args
= 1;
21406 if (args_to_push
== 4)
21407 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21409 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21410 plus_constant (Pmode
,
21414 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21416 /* Just tell the dwarf backend that we adjusted SP. */
21417 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21418 plus_constant (Pmode
, stack_pointer_rtx
,
21420 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21423 RTX_FRAME_RELATED_P (insn
) = 1;
21424 fp_offset
= args_to_push
;
21429 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21431 if (IS_INTERRUPT (func_type
))
21433 /* Interrupt functions must not corrupt any registers.
21434 Creating a frame pointer however, corrupts the IP
21435 register, so we must push it first. */
21436 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21438 /* Do not set RTX_FRAME_RELATED_P on this insn.
21439 The dwarf stack unwinding code only wants to see one
21440 stack decrement per function, and this is not it. If
21441 this instruction is labeled as being part of the frame
21442 creation sequence then dwarf2out_frame_debug_expr will
21443 die when it encounters the assignment of IP to FP
21444 later on, since the use of SP here establishes SP as
21445 the CFA register and not IP.
21447 Anyway this instruction is not really part of the stack
21448 frame creation although it is part of the prologue. */
21451 insn
= emit_set_insn (ip_rtx
,
21452 plus_constant (Pmode
, stack_pointer_rtx
,
21454 RTX_FRAME_RELATED_P (insn
) = 1;
21459 /* Push the argument registers, or reserve space for them. */
21460 if (cfun
->machine
->uses_anonymous_args
)
21461 insn
= emit_multi_reg_push
21462 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21463 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21466 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21467 GEN_INT (- args_to_push
)));
21468 RTX_FRAME_RELATED_P (insn
) = 1;
21471 /* If this is an interrupt service routine, and the link register
21472 is going to be pushed, and we're not generating extra
21473 push of IP (needed when frame is needed and frame layout if apcs),
21474 subtracting four from LR now will mean that the function return
21475 can be done with a single instruction. */
21476 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21477 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21478 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21481 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21483 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21486 if (live_regs_mask
)
21488 unsigned long dwarf_regs_mask
= live_regs_mask
;
21490 saved_regs
+= bit_count (live_regs_mask
) * 4;
21491 if (optimize_size
&& !frame_pointer_needed
21492 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21494 /* If no coprocessor registers are being pushed and we don't have
21495 to worry about a frame pointer then push extra registers to
21496 create the stack frame. This is done is a way that does not
21497 alter the frame layout, so is independent of the epilogue. */
21501 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21503 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21504 if (frame
&& n
* 4 >= frame
)
21507 live_regs_mask
|= (1 << n
) - 1;
21508 saved_regs
+= frame
;
21513 && current_tune
->prefer_ldrd_strd
21514 && !optimize_function_for_size_p (cfun
))
21516 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21518 thumb2_emit_strd_push (live_regs_mask
);
21519 else if (TARGET_ARM
21520 && !TARGET_APCS_FRAME
21521 && !IS_INTERRUPT (func_type
))
21522 arm_emit_strd_push (live_regs_mask
);
21525 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21526 RTX_FRAME_RELATED_P (insn
) = 1;
21531 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21532 RTX_FRAME_RELATED_P (insn
) = 1;
21536 if (! IS_VOLATILE (func_type
))
21537 saved_regs
+= arm_save_coproc_regs ();
21539 if (frame_pointer_needed
&& TARGET_ARM
)
21541 /* Create the new frame pointer. */
21542 if (TARGET_APCS_FRAME
)
21544 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21545 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21546 RTX_FRAME_RELATED_P (insn
) = 1;
21550 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21551 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21552 stack_pointer_rtx
, insn
));
21553 RTX_FRAME_RELATED_P (insn
) = 1;
21557 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21558 if (flag_stack_usage_info
)
21559 current_function_static_stack_size
= size
;
21561 /* If this isn't an interrupt service routine and we have a frame, then do
21562 stack checking. We use IP as the first scratch register, except for the
21563 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21564 if (!IS_INTERRUPT (func_type
)
21565 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21567 unsigned int regno
;
21569 if (!IS_NESTED (func_type
) || clobber_ip
)
21571 else if (df_regs_ever_live_p (LR_REGNUM
))
21576 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21578 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21579 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21580 size
- STACK_CHECK_PROTECT
,
21581 regno
, live_regs_mask
);
21584 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21585 regno
, live_regs_mask
);
21588 /* Recover the static chain register. */
21591 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21592 insn
= gen_rtx_REG (SImode
, 3);
21595 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21596 insn
= gen_frame_mem (SImode
, insn
);
21598 emit_set_insn (ip_rtx
, insn
);
21599 emit_insn (gen_force_register_use (ip_rtx
));
21602 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21604 /* This add can produce multiple insns for a large constant, so we
21605 need to get tricky. */
21606 rtx_insn
*last
= get_last_insn ();
21608 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21609 - offsets
->outgoing_args
);
21611 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21615 last
= last
? NEXT_INSN (last
) : get_insns ();
21616 RTX_FRAME_RELATED_P (last
) = 1;
21618 while (last
!= insn
);
21620 /* If the frame pointer is needed, emit a special barrier that
21621 will prevent the scheduler from moving stores to the frame
21622 before the stack adjustment. */
21623 if (frame_pointer_needed
)
21624 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21625 hard_frame_pointer_rtx
));
21629 if (frame_pointer_needed
&& TARGET_THUMB2
)
21630 thumb_set_frame_pointer (offsets
);
21632 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21634 unsigned long mask
;
21636 mask
= live_regs_mask
;
21637 mask
&= THUMB2_WORK_REGS
;
21638 if (!IS_NESTED (func_type
))
21639 mask
|= (1 << IP_REGNUM
);
21640 arm_load_pic_register (mask
);
21643 /* If we are profiling, make sure no instructions are scheduled before
21644 the call to mcount. Similarly if the user has requested no
21645 scheduling in the prolog. Similarly if we want non-call exceptions
21646 using the EABI unwinder, to prevent faulting instructions from being
21647 swapped with a stack adjustment. */
21648 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21649 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21650 && cfun
->can_throw_non_call_exceptions
))
21651 emit_insn (gen_blockage ());
21653 /* If the link register is being kept alive, with the return address in it,
21654 then make sure that it does not get reused by the ce2 pass. */
21655 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21656 cfun
->machine
->lr_save_eliminated
= 1;
21659 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21661 arm_print_condition (FILE *stream
)
21663 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21665 /* Branch conversion is not implemented for Thumb-2. */
21668 output_operand_lossage ("predicated Thumb instruction");
21671 if (current_insn_predicate
!= NULL
)
21673 output_operand_lossage
21674 ("predicated instruction in conditional sequence");
21678 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21680 else if (current_insn_predicate
)
21682 enum arm_cond_code code
;
21686 output_operand_lossage ("predicated Thumb instruction");
21690 code
= get_arm_condition_code (current_insn_predicate
);
21691 fputs (arm_condition_codes
[code
], stream
);
21696 /* Globally reserved letters: acln
21697 Puncutation letters currently used: @_|?().!#
21698 Lower case letters currently used: bcdefhimpqtvwxyz
21699 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21700 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21702 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21704 If CODE is 'd', then the X is a condition operand and the instruction
21705 should only be executed if the condition is true.
21706 if CODE is 'D', then the X is a condition operand and the instruction
21707 should only be executed if the condition is false: however, if the mode
21708 of the comparison is CCFPEmode, then always execute the instruction -- we
21709 do this because in these circumstances !GE does not necessarily imply LT;
21710 in these cases the instruction pattern will take care to make sure that
21711 an instruction containing %d will follow, thereby undoing the effects of
21712 doing this instruction unconditionally.
21713 If CODE is 'N' then X is a floating point operand that must be negated
21715 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21716 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21718 arm_print_operand (FILE *stream
, rtx x
, int code
)
21723 fputs (ASM_COMMENT_START
, stream
);
21727 fputs (user_label_prefix
, stream
);
21731 fputs (REGISTER_PREFIX
, stream
);
21735 arm_print_condition (stream
);
21739 /* The current condition code for a condition code setting instruction.
21740 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21741 fputc('s', stream
);
21742 arm_print_condition (stream
);
21746 /* If the instruction is conditionally executed then print
21747 the current condition code, otherwise print 's'. */
21748 gcc_assert (TARGET_THUMB2
);
21749 if (current_insn_predicate
)
21750 arm_print_condition (stream
);
21752 fputc('s', stream
);
21755 /* %# is a "break" sequence. It doesn't output anything, but is used to
21756 separate e.g. operand numbers from following text, if that text consists
21757 of further digits which we don't want to be part of the operand
21765 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21766 fprintf (stream
, "%s", fp_const_from_val (&r
));
21770 /* An integer or symbol address without a preceding # sign. */
21772 switch (GET_CODE (x
))
21775 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21779 output_addr_const (stream
, x
);
21783 if (GET_CODE (XEXP (x
, 0)) == PLUS
21784 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21786 output_addr_const (stream
, x
);
21789 /* Fall through. */
21792 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21796 /* An integer that we want to print in HEX. */
21798 switch (GET_CODE (x
))
21801 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21805 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21810 if (CONST_INT_P (x
))
21813 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21814 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21818 putc ('~', stream
);
21819 output_addr_const (stream
, x
);
21824 /* Print the log2 of a CONST_INT. */
21828 if (!CONST_INT_P (x
)
21829 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21830 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21832 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21837 /* The low 16 bits of an immediate constant. */
21838 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21842 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21846 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21854 shift
= shift_op (x
, &val
);
21858 fprintf (stream
, ", %s ", shift
);
21860 arm_print_operand (stream
, XEXP (x
, 1), 0);
21862 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21867 /* An explanation of the 'Q', 'R' and 'H' register operands:
21869 In a pair of registers containing a DI or DF value the 'Q'
21870 operand returns the register number of the register containing
21871 the least significant part of the value. The 'R' operand returns
21872 the register number of the register containing the most
21873 significant part of the value.
21875 The 'H' operand returns the higher of the two register numbers.
21876 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21877 same as the 'Q' operand, since the most significant part of the
21878 value is held in the lower number register. The reverse is true
21879 on systems where WORDS_BIG_ENDIAN is false.
21881 The purpose of these operands is to distinguish between cases
21882 where the endian-ness of the values is important (for example
21883 when they are added together), and cases where the endian-ness
21884 is irrelevant, but the order of register operations is important.
21885 For example when loading a value from memory into a register
21886 pair, the endian-ness does not matter. Provided that the value
21887 from the lower memory address is put into the lower numbered
21888 register, and the value from the higher address is put into the
21889 higher numbered register, the load will work regardless of whether
21890 the value being loaded is big-wordian or little-wordian. The
21891 order of the two register loads can matter however, if the address
21892 of the memory location is actually held in one of the registers
21893 being overwritten by the load.
21895 The 'Q' and 'R' constraints are also available for 64-bit
21898 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21900 rtx part
= gen_lowpart (SImode
, x
);
21901 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21905 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21907 output_operand_lossage ("invalid operand for code '%c'", code
);
21911 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21915 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21917 machine_mode mode
= GET_MODE (x
);
21920 if (mode
== VOIDmode
)
21922 part
= gen_highpart_mode (SImode
, mode
, x
);
21923 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21927 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21929 output_operand_lossage ("invalid operand for code '%c'", code
);
21933 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21937 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21939 output_operand_lossage ("invalid operand for code '%c'", code
);
21943 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21947 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21949 output_operand_lossage ("invalid operand for code '%c'", code
);
21953 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21957 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21959 output_operand_lossage ("invalid operand for code '%c'", code
);
21963 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21967 asm_fprintf (stream
, "%r",
21968 REG_P (XEXP (x
, 0))
21969 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21973 asm_fprintf (stream
, "{%r-%r}",
21975 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21978 /* Like 'M', but writing doubleword vector registers, for use by Neon
21982 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21983 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21985 asm_fprintf (stream
, "{d%d}", regno
);
21987 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21992 /* CONST_TRUE_RTX means always -- that's the default. */
21993 if (x
== const_true_rtx
)
21996 if (!COMPARISON_P (x
))
21998 output_operand_lossage ("invalid operand for code '%c'", code
);
22002 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22007 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22008 want to do that. */
22009 if (x
== const_true_rtx
)
22011 output_operand_lossage ("instruction never executed");
22014 if (!COMPARISON_P (x
))
22016 output_operand_lossage ("invalid operand for code '%c'", code
);
22020 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22021 (get_arm_condition_code (x
))],
22031 /* Former Maverick support, removed after GCC-4.7. */
22032 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22037 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22038 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22039 /* Bad value for wCG register number. */
22041 output_operand_lossage ("invalid operand for code '%c'", code
);
22046 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22049 /* Print an iWMMXt control register name. */
22051 if (!CONST_INT_P (x
)
22053 || INTVAL (x
) >= 16)
22054 /* Bad value for wC register number. */
22056 output_operand_lossage ("invalid operand for code '%c'", code
);
22062 static const char * wc_reg_names
[16] =
22064 "wCID", "wCon", "wCSSF", "wCASF",
22065 "wC4", "wC5", "wC6", "wC7",
22066 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22067 "wC12", "wC13", "wC14", "wC15"
22070 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22074 /* Print the high single-precision register of a VFP double-precision
22078 machine_mode mode
= GET_MODE (x
);
22081 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22083 output_operand_lossage ("invalid operand for code '%c'", code
);
22088 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22090 output_operand_lossage ("invalid operand for code '%c'", code
);
22094 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22098 /* Print a VFP/Neon double precision or quad precision register name. */
22102 machine_mode mode
= GET_MODE (x
);
22103 int is_quad
= (code
== 'q');
22106 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22108 output_operand_lossage ("invalid operand for code '%c'", code
);
22113 || !IS_VFP_REGNUM (REGNO (x
)))
22115 output_operand_lossage ("invalid operand for code '%c'", code
);
22120 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22121 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22123 output_operand_lossage ("invalid operand for code '%c'", code
);
22127 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22128 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22132 /* These two codes print the low/high doubleword register of a Neon quad
22133 register, respectively. For pair-structure types, can also print
22134 low/high quadword registers. */
22138 machine_mode mode
= GET_MODE (x
);
22141 if ((GET_MODE_SIZE (mode
) != 16
22142 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22144 output_operand_lossage ("invalid operand for code '%c'", code
);
22149 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22151 output_operand_lossage ("invalid operand for code '%c'", code
);
22155 if (GET_MODE_SIZE (mode
) == 16)
22156 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22157 + (code
== 'f' ? 1 : 0));
22159 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22160 + (code
== 'f' ? 1 : 0));
22164 /* Print a VFPv3 floating-point constant, represented as an integer
22168 int index
= vfp3_const_double_index (x
);
22169 gcc_assert (index
!= -1);
22170 fprintf (stream
, "%d", index
);
22174 /* Print bits representing opcode features for Neon.
22176 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22177 and polynomials as unsigned.
22179 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22181 Bit 2 is 1 for rounding functions, 0 otherwise. */
22183 /* Identify the type as 's', 'u', 'p' or 'f'. */
22186 HOST_WIDE_INT bits
= INTVAL (x
);
22187 fputc ("uspf"[bits
& 3], stream
);
22191 /* Likewise, but signed and unsigned integers are both 'i'. */
22194 HOST_WIDE_INT bits
= INTVAL (x
);
22195 fputc ("iipf"[bits
& 3], stream
);
22199 /* As for 'T', but emit 'u' instead of 'p'. */
22202 HOST_WIDE_INT bits
= INTVAL (x
);
22203 fputc ("usuf"[bits
& 3], stream
);
22207 /* Bit 2: rounding (vs none). */
22210 HOST_WIDE_INT bits
= INTVAL (x
);
22211 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22215 /* Memory operand for vld1/vst1 instruction. */
22219 bool postinc
= FALSE
;
22220 rtx postinc_reg
= NULL
;
22221 unsigned align
, memsize
, align_bits
;
22223 gcc_assert (MEM_P (x
));
22224 addr
= XEXP (x
, 0);
22225 if (GET_CODE (addr
) == POST_INC
)
22228 addr
= XEXP (addr
, 0);
22230 if (GET_CODE (addr
) == POST_MODIFY
)
22232 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22233 addr
= XEXP (addr
, 0);
22235 asm_fprintf (stream
, "[%r", REGNO (addr
));
22237 /* We know the alignment of this access, so we can emit a hint in the
22238 instruction (for some alignments) as an aid to the memory subsystem
22240 align
= MEM_ALIGN (x
) >> 3;
22241 memsize
= MEM_SIZE (x
);
22243 /* Only certain alignment specifiers are supported by the hardware. */
22244 if (memsize
== 32 && (align
% 32) == 0)
22246 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22248 else if (memsize
>= 8 && (align
% 8) == 0)
22253 if (align_bits
!= 0)
22254 asm_fprintf (stream
, ":%d", align_bits
);
22256 asm_fprintf (stream
, "]");
22259 fputs("!", stream
);
22261 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22269 gcc_assert (MEM_P (x
));
22270 addr
= XEXP (x
, 0);
22271 gcc_assert (REG_P (addr
));
22272 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22276 /* Translate an S register number into a D register number and element index. */
22279 machine_mode mode
= GET_MODE (x
);
22282 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22284 output_operand_lossage ("invalid operand for code '%c'", code
);
22289 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22291 output_operand_lossage ("invalid operand for code '%c'", code
);
22295 regno
= regno
- FIRST_VFP_REGNUM
;
22296 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22301 gcc_assert (CONST_DOUBLE_P (x
));
22303 result
= vfp3_const_double_for_fract_bits (x
);
22305 result
= vfp3_const_double_for_bits (x
);
22306 fprintf (stream
, "#%d", result
);
22309 /* Register specifier for vld1.16/vst1.16. Translate the S register
22310 number into a D register number and element index. */
22313 machine_mode mode
= GET_MODE (x
);
22316 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22318 output_operand_lossage ("invalid operand for code '%c'", code
);
22323 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22325 output_operand_lossage ("invalid operand for code '%c'", code
);
22329 regno
= regno
- FIRST_VFP_REGNUM
;
22330 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22337 output_operand_lossage ("missing operand");
22341 switch (GET_CODE (x
))
22344 asm_fprintf (stream
, "%r", REGNO (x
));
22348 output_address (GET_MODE (x
), XEXP (x
, 0));
22354 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22355 sizeof (fpstr
), 0, 1);
22356 fprintf (stream
, "#%s", fpstr
);
22361 gcc_assert (GET_CODE (x
) != NEG
);
22362 fputc ('#', stream
);
22363 if (GET_CODE (x
) == HIGH
)
22365 fputs (":lower16:", stream
);
22369 output_addr_const (stream
, x
);
22375 /* Target hook for printing a memory address. */
22377 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22381 int is_minus
= GET_CODE (x
) == MINUS
;
22384 asm_fprintf (stream
, "[%r]", REGNO (x
));
22385 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22387 rtx base
= XEXP (x
, 0);
22388 rtx index
= XEXP (x
, 1);
22389 HOST_WIDE_INT offset
= 0;
22391 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22393 /* Ensure that BASE is a register. */
22394 /* (one of them must be). */
22395 /* Also ensure the SP is not used as in index register. */
22396 std::swap (base
, index
);
22398 switch (GET_CODE (index
))
22401 offset
= INTVAL (index
);
22404 asm_fprintf (stream
, "[%r, #%wd]",
22405 REGNO (base
), offset
);
22409 asm_fprintf (stream
, "[%r, %s%r]",
22410 REGNO (base
), is_minus
? "-" : "",
22420 asm_fprintf (stream
, "[%r, %s%r",
22421 REGNO (base
), is_minus
? "-" : "",
22422 REGNO (XEXP (index
, 0)));
22423 arm_print_operand (stream
, index
, 'S');
22424 fputs ("]", stream
);
22429 gcc_unreachable ();
22432 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22433 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22435 gcc_assert (REG_P (XEXP (x
, 0)));
22437 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22438 asm_fprintf (stream
, "[%r, #%s%d]!",
22439 REGNO (XEXP (x
, 0)),
22440 GET_CODE (x
) == PRE_DEC
? "-" : "",
22441 GET_MODE_SIZE (mode
));
22443 asm_fprintf (stream
, "[%r], #%s%d",
22444 REGNO (XEXP (x
, 0)),
22445 GET_CODE (x
) == POST_DEC
? "-" : "",
22446 GET_MODE_SIZE (mode
));
22448 else if (GET_CODE (x
) == PRE_MODIFY
)
22450 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22451 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22452 asm_fprintf (stream
, "#%wd]!",
22453 INTVAL (XEXP (XEXP (x
, 1), 1)));
22455 asm_fprintf (stream
, "%r]!",
22456 REGNO (XEXP (XEXP (x
, 1), 1)));
22458 else if (GET_CODE (x
) == POST_MODIFY
)
22460 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22461 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22462 asm_fprintf (stream
, "#%wd",
22463 INTVAL (XEXP (XEXP (x
, 1), 1)));
22465 asm_fprintf (stream
, "%r",
22466 REGNO (XEXP (XEXP (x
, 1), 1)));
22468 else output_addr_const (stream
, x
);
22473 asm_fprintf (stream
, "[%r]", REGNO (x
));
22474 else if (GET_CODE (x
) == POST_INC
)
22475 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22476 else if (GET_CODE (x
) == PLUS
)
22478 gcc_assert (REG_P (XEXP (x
, 0)));
22479 if (CONST_INT_P (XEXP (x
, 1)))
22480 asm_fprintf (stream
, "[%r, #%wd]",
22481 REGNO (XEXP (x
, 0)),
22482 INTVAL (XEXP (x
, 1)));
22484 asm_fprintf (stream
, "[%r, %r]",
22485 REGNO (XEXP (x
, 0)),
22486 REGNO (XEXP (x
, 1)));
22489 output_addr_const (stream
, x
);
22493 /* Target hook for indicating whether a punctuation character for
22494 TARGET_PRINT_OPERAND is valid. */
22496 arm_print_operand_punct_valid_p (unsigned char code
)
22498 return (code
== '@' || code
== '|' || code
== '.'
22499 || code
== '(' || code
== ')' || code
== '#'
22500 || (TARGET_32BIT
&& (code
== '?'))
22501 || (TARGET_THUMB2
&& (code
== '!'))
22502 || (TARGET_THUMB
&& (code
== '_')));
22505 /* Target hook for assembling integer objects. The ARM version needs to
22506 handle word-sized values specially. */
22508 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22512 if (size
== UNITS_PER_WORD
&& aligned_p
)
22514 fputs ("\t.word\t", asm_out_file
);
22515 output_addr_const (asm_out_file
, x
);
22517 /* Mark symbols as position independent. We only do this in the
22518 .text segment, not in the .data segment. */
22519 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22520 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22522 /* See legitimize_pic_address for an explanation of the
22523 TARGET_VXWORKS_RTP check. */
22524 /* References to weak symbols cannot be resolved locally:
22525 they may be overridden by a non-weak definition at link
22527 if (!arm_pic_data_is_text_relative
22528 || (GET_CODE (x
) == SYMBOL_REF
22529 && (!SYMBOL_REF_LOCAL_P (x
)
22530 || (SYMBOL_REF_DECL (x
)
22531 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22532 fputs ("(GOT)", asm_out_file
);
22534 fputs ("(GOTOFF)", asm_out_file
);
22536 fputc ('\n', asm_out_file
);
22540 mode
= GET_MODE (x
);
22542 if (arm_vector_mode_supported_p (mode
))
22546 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22548 units
= CONST_VECTOR_NUNITS (x
);
22549 size
= GET_MODE_UNIT_SIZE (mode
);
22551 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22552 for (i
= 0; i
< units
; i
++)
22554 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22556 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22559 for (i
= 0; i
< units
; i
++)
22561 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22563 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
22564 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22570 return default_assemble_integer (x
, size
, aligned_p
);
22574 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22578 if (!TARGET_AAPCS_BASED
)
22581 default_named_section_asm_out_constructor
22582 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22586 /* Put these in the .init_array section, using a special relocation. */
22587 if (priority
!= DEFAULT_INIT_PRIORITY
)
22590 sprintf (buf
, "%s.%.5u",
22591 is_ctor
? ".init_array" : ".fini_array",
22593 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22600 switch_to_section (s
);
22601 assemble_align (POINTER_SIZE
);
22602 fputs ("\t.word\t", asm_out_file
);
22603 output_addr_const (asm_out_file
, symbol
);
22604 fputs ("(target1)\n", asm_out_file
);
22607 /* Add a function to the list of static constructors. */
22610 arm_elf_asm_constructor (rtx symbol
, int priority
)
22612 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22615 /* Add a function to the list of static destructors. */
22618 arm_elf_asm_destructor (rtx symbol
, int priority
)
22620 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22623 /* A finite state machine takes care of noticing whether or not instructions
22624 can be conditionally executed, and thus decrease execution time and code
22625 size by deleting branch instructions. The fsm is controlled by
22626 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22628 /* The state of the fsm controlling condition codes are:
22629 0: normal, do nothing special
22630 1: make ASM_OUTPUT_OPCODE not output this instruction
22631 2: make ASM_OUTPUT_OPCODE not output this instruction
22632 3: make instructions conditional
22633 4: make instructions conditional
22635 State transitions (state->state by whom under condition):
22636 0 -> 1 final_prescan_insn if the `target' is a label
22637 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22638 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22639 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22640 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22641 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22642 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22643 (the target insn is arm_target_insn).
22645 If the jump clobbers the conditions then we use states 2 and 4.
22647 A similar thing can be done with conditional return insns.
22649 XXX In case the `target' is an unconditional branch, this conditionalising
22650 of the instructions always reduces code size, but not always execution
22651 time. But then, I want to reduce the code size to somewhere near what
22652 /bin/cc produces. */
22654 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22655 instructions. When a COND_EXEC instruction is seen the subsequent
22656 instructions are scanned so that multiple conditional instructions can be
22657 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22658 specify the length and true/false mask for the IT block. These will be
22659 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22661 /* Returns the index of the ARM condition code string in
22662 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22663 COMPARISON should be an rtx like `(eq (...) (...))'. */
22666 maybe_get_arm_condition_code (rtx comparison
)
22668 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22669 enum arm_cond_code code
;
22670 enum rtx_code comp_code
= GET_CODE (comparison
);
22672 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22673 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22674 XEXP (comparison
, 1));
22678 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22679 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22680 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22681 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22682 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22683 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22684 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22685 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22686 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22687 case CC_DLTUmode
: code
= ARM_CC
;
22690 if (comp_code
== EQ
)
22691 return ARM_INVERSE_CONDITION_CODE (code
);
22692 if (comp_code
== NE
)
22699 case NE
: return ARM_NE
;
22700 case EQ
: return ARM_EQ
;
22701 case GE
: return ARM_PL
;
22702 case LT
: return ARM_MI
;
22703 default: return ARM_NV
;
22709 case NE
: return ARM_NE
;
22710 case EQ
: return ARM_EQ
;
22711 default: return ARM_NV
;
22717 case NE
: return ARM_MI
;
22718 case EQ
: return ARM_PL
;
22719 default: return ARM_NV
;
22724 /* We can handle all cases except UNEQ and LTGT. */
22727 case GE
: return ARM_GE
;
22728 case GT
: return ARM_GT
;
22729 case LE
: return ARM_LS
;
22730 case LT
: return ARM_MI
;
22731 case NE
: return ARM_NE
;
22732 case EQ
: return ARM_EQ
;
22733 case ORDERED
: return ARM_VC
;
22734 case UNORDERED
: return ARM_VS
;
22735 case UNLT
: return ARM_LT
;
22736 case UNLE
: return ARM_LE
;
22737 case UNGT
: return ARM_HI
;
22738 case UNGE
: return ARM_PL
;
22739 /* UNEQ and LTGT do not have a representation. */
22740 case UNEQ
: /* Fall through. */
22741 case LTGT
: /* Fall through. */
22742 default: return ARM_NV
;
22748 case NE
: return ARM_NE
;
22749 case EQ
: return ARM_EQ
;
22750 case GE
: return ARM_LE
;
22751 case GT
: return ARM_LT
;
22752 case LE
: return ARM_GE
;
22753 case LT
: return ARM_GT
;
22754 case GEU
: return ARM_LS
;
22755 case GTU
: return ARM_CC
;
22756 case LEU
: return ARM_CS
;
22757 case LTU
: return ARM_HI
;
22758 default: return ARM_NV
;
22764 case LTU
: return ARM_CS
;
22765 case GEU
: return ARM_CC
;
22766 case NE
: return ARM_CS
;
22767 case EQ
: return ARM_CC
;
22768 default: return ARM_NV
;
22774 case NE
: return ARM_NE
;
22775 case EQ
: return ARM_EQ
;
22776 case GEU
: return ARM_CS
;
22777 case GTU
: return ARM_HI
;
22778 case LEU
: return ARM_LS
;
22779 case LTU
: return ARM_CC
;
22780 default: return ARM_NV
;
22786 case GE
: return ARM_GE
;
22787 case LT
: return ARM_LT
;
22788 case GEU
: return ARM_CS
;
22789 case LTU
: return ARM_CC
;
22790 default: return ARM_NV
;
22796 case NE
: return ARM_VS
;
22797 case EQ
: return ARM_VC
;
22798 default: return ARM_NV
;
22804 case NE
: return ARM_NE
;
22805 case EQ
: return ARM_EQ
;
22806 case GE
: return ARM_GE
;
22807 case GT
: return ARM_GT
;
22808 case LE
: return ARM_LE
;
22809 case LT
: return ARM_LT
;
22810 case GEU
: return ARM_CS
;
22811 case GTU
: return ARM_HI
;
22812 case LEU
: return ARM_LS
;
22813 case LTU
: return ARM_CC
;
22814 default: return ARM_NV
;
22817 default: gcc_unreachable ();
22821 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22822 static enum arm_cond_code
22823 get_arm_condition_code (rtx comparison
)
22825 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22826 gcc_assert (code
!= ARM_NV
);
22830 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22833 thumb2_final_prescan_insn (rtx_insn
*insn
)
22835 rtx_insn
*first_insn
= insn
;
22836 rtx body
= PATTERN (insn
);
22838 enum arm_cond_code code
;
22843 /* max_insns_skipped in the tune was already taken into account in the
22844 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22845 just emit the IT blocks as we can. It does not make sense to split
22847 max
= MAX_INSN_PER_IT_BLOCK
;
22849 /* Remove the previous insn from the count of insns to be output. */
22850 if (arm_condexec_count
)
22851 arm_condexec_count
--;
22853 /* Nothing to do if we are already inside a conditional block. */
22854 if (arm_condexec_count
)
22857 if (GET_CODE (body
) != COND_EXEC
)
22860 /* Conditional jumps are implemented directly. */
22864 predicate
= COND_EXEC_TEST (body
);
22865 arm_current_cc
= get_arm_condition_code (predicate
);
22867 n
= get_attr_ce_count (insn
);
22868 arm_condexec_count
= 1;
22869 arm_condexec_mask
= (1 << n
) - 1;
22870 arm_condexec_masklen
= n
;
22871 /* See if subsequent instructions can be combined into the same block. */
22874 insn
= next_nonnote_insn (insn
);
22876 /* Jumping into the middle of an IT block is illegal, so a label or
22877 barrier terminates the block. */
22878 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22881 body
= PATTERN (insn
);
22882 /* USE and CLOBBER aren't really insns, so just skip them. */
22883 if (GET_CODE (body
) == USE
22884 || GET_CODE (body
) == CLOBBER
)
22887 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22888 if (GET_CODE (body
) != COND_EXEC
)
22890 /* Maximum number of conditionally executed instructions in a block. */
22891 n
= get_attr_ce_count (insn
);
22892 if (arm_condexec_masklen
+ n
> max
)
22895 predicate
= COND_EXEC_TEST (body
);
22896 code
= get_arm_condition_code (predicate
);
22897 mask
= (1 << n
) - 1;
22898 if (arm_current_cc
== code
)
22899 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22900 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22903 arm_condexec_count
++;
22904 arm_condexec_masklen
+= n
;
22906 /* A jump must be the last instruction in a conditional block. */
22910 /* Restore recog_data (getting the attributes of other insns can
22911 destroy this array, but final.c assumes that it remains intact
22912 across this call). */
22913 extract_constrain_insn_cached (first_insn
);
22917 arm_final_prescan_insn (rtx_insn
*insn
)
22919 /* BODY will hold the body of INSN. */
22920 rtx body
= PATTERN (insn
);
22922 /* This will be 1 if trying to repeat the trick, and things need to be
22923 reversed if it appears to fail. */
22926 /* If we start with a return insn, we only succeed if we find another one. */
22927 int seeking_return
= 0;
22928 enum rtx_code return_code
= UNKNOWN
;
22930 /* START_INSN will hold the insn from where we start looking. This is the
22931 first insn after the following code_label if REVERSE is true. */
22932 rtx_insn
*start_insn
= insn
;
22934 /* If in state 4, check if the target branch is reached, in order to
22935 change back to state 0. */
22936 if (arm_ccfsm_state
== 4)
22938 if (insn
== arm_target_insn
)
22940 arm_target_insn
= NULL
;
22941 arm_ccfsm_state
= 0;
22946 /* If in state 3, it is possible to repeat the trick, if this insn is an
22947 unconditional branch to a label, and immediately following this branch
22948 is the previous target label which is only used once, and the label this
22949 branch jumps to is not too far off. */
22950 if (arm_ccfsm_state
== 3)
22952 if (simplejump_p (insn
))
22954 start_insn
= next_nonnote_insn (start_insn
);
22955 if (BARRIER_P (start_insn
))
22957 /* XXX Isn't this always a barrier? */
22958 start_insn
= next_nonnote_insn (start_insn
);
22960 if (LABEL_P (start_insn
)
22961 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22962 && LABEL_NUSES (start_insn
) == 1)
22967 else if (ANY_RETURN_P (body
))
22969 start_insn
= next_nonnote_insn (start_insn
);
22970 if (BARRIER_P (start_insn
))
22971 start_insn
= next_nonnote_insn (start_insn
);
22972 if (LABEL_P (start_insn
)
22973 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22974 && LABEL_NUSES (start_insn
) == 1)
22977 seeking_return
= 1;
22978 return_code
= GET_CODE (body
);
22987 gcc_assert (!arm_ccfsm_state
|| reverse
);
22988 if (!JUMP_P (insn
))
22991 /* This jump might be paralleled with a clobber of the condition codes
22992 the jump should always come first */
22993 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22994 body
= XVECEXP (body
, 0, 0);
22997 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22998 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23001 int fail
= FALSE
, succeed
= FALSE
;
23002 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23003 int then_not_else
= TRUE
;
23004 rtx_insn
*this_insn
= start_insn
;
23007 /* Register the insn jumped to. */
23010 if (!seeking_return
)
23011 label
= XEXP (SET_SRC (body
), 0);
23013 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23014 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23015 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23017 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23018 then_not_else
= FALSE
;
23020 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23022 seeking_return
= 1;
23023 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23025 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23027 seeking_return
= 1;
23028 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23029 then_not_else
= FALSE
;
23032 gcc_unreachable ();
23034 /* See how many insns this branch skips, and what kind of insns. If all
23035 insns are okay, and the label or unconditional branch to the same
23036 label is not too far away, succeed. */
23037 for (insns_skipped
= 0;
23038 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23042 this_insn
= next_nonnote_insn (this_insn
);
23046 switch (GET_CODE (this_insn
))
23049 /* Succeed if it is the target label, otherwise fail since
23050 control falls in from somewhere else. */
23051 if (this_insn
== label
)
23053 arm_ccfsm_state
= 1;
23061 /* Succeed if the following insn is the target label.
23063 If return insns are used then the last insn in a function
23064 will be a barrier. */
23065 this_insn
= next_nonnote_insn (this_insn
);
23066 if (this_insn
&& this_insn
== label
)
23068 arm_ccfsm_state
= 1;
23076 /* The AAPCS says that conditional calls should not be
23077 used since they make interworking inefficient (the
23078 linker can't transform BL<cond> into BLX). That's
23079 only a problem if the machine has BLX. */
23086 /* Succeed if the following insn is the target label, or
23087 if the following two insns are a barrier and the
23089 this_insn
= next_nonnote_insn (this_insn
);
23090 if (this_insn
&& BARRIER_P (this_insn
))
23091 this_insn
= next_nonnote_insn (this_insn
);
23093 if (this_insn
&& this_insn
== label
23094 && insns_skipped
< max_insns_skipped
)
23096 arm_ccfsm_state
= 1;
23104 /* If this is an unconditional branch to the same label, succeed.
23105 If it is to another label, do nothing. If it is conditional,
23107 /* XXX Probably, the tests for SET and the PC are
23110 scanbody
= PATTERN (this_insn
);
23111 if (GET_CODE (scanbody
) == SET
23112 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23114 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23115 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23117 arm_ccfsm_state
= 2;
23120 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23123 /* Fail if a conditional return is undesirable (e.g. on a
23124 StrongARM), but still allow this if optimizing for size. */
23125 else if (GET_CODE (scanbody
) == return_code
23126 && !use_return_insn (TRUE
, NULL
)
23129 else if (GET_CODE (scanbody
) == return_code
)
23131 arm_ccfsm_state
= 2;
23134 else if (GET_CODE (scanbody
) == PARALLEL
)
23136 switch (get_attr_conds (this_insn
))
23146 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23151 /* Instructions using or affecting the condition codes make it
23153 scanbody
= PATTERN (this_insn
);
23154 if (!(GET_CODE (scanbody
) == SET
23155 || GET_CODE (scanbody
) == PARALLEL
)
23156 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23166 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23167 arm_target_label
= CODE_LABEL_NUMBER (label
);
23170 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23172 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23174 this_insn
= next_nonnote_insn (this_insn
);
23175 gcc_assert (!this_insn
23176 || (!BARRIER_P (this_insn
)
23177 && !LABEL_P (this_insn
)));
23181 /* Oh, dear! we ran off the end.. give up. */
23182 extract_constrain_insn_cached (insn
);
23183 arm_ccfsm_state
= 0;
23184 arm_target_insn
= NULL
;
23187 arm_target_insn
= this_insn
;
23190 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23193 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23195 if (reverse
|| then_not_else
)
23196 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23199 /* Restore recog_data (getting the attributes of other insns can
23200 destroy this array, but final.c assumes that it remains intact
23201 across this call. */
23202 extract_constrain_insn_cached (insn
);
23206 /* Output IT instructions. */
23208 thumb2_asm_output_opcode (FILE * stream
)
23213 if (arm_condexec_mask
)
23215 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23216 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23218 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23219 arm_condition_codes
[arm_current_cc
]);
23220 arm_condexec_mask
= 0;
23224 /* Returns true if REGNO is a valid register
23225 for holding a quantity of type MODE. */
23227 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23229 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23230 return (regno
== CC_REGNUM
23231 || (TARGET_HARD_FLOAT
23232 && regno
== VFPCC_REGNUM
));
23234 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23238 /* For the Thumb we only allow values bigger than SImode in
23239 registers 0 - 6, so that there is always a second low
23240 register available to hold the upper part of the value.
23241 We probably we ought to ensure that the register is the
23242 start of an even numbered register pair. */
23243 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23245 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23247 if (mode
== SFmode
|| mode
== SImode
)
23248 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23250 if (mode
== DFmode
)
23251 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23253 if (mode
== HFmode
)
23254 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23256 /* VFP registers can hold HImode values. */
23257 if (mode
== HImode
)
23258 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23261 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23262 || (VALID_NEON_QREG_MODE (mode
)
23263 && NEON_REGNO_OK_FOR_QUAD (regno
))
23264 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23265 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23266 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23267 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23268 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23273 if (TARGET_REALLY_IWMMXT
)
23275 if (IS_IWMMXT_GR_REGNUM (regno
))
23276 return mode
== SImode
;
23278 if (IS_IWMMXT_REGNUM (regno
))
23279 return VALID_IWMMXT_REG_MODE (mode
);
23282 /* We allow almost any value to be stored in the general registers.
23283 Restrict doubleword quantities to even register pairs in ARM state
23284 so that we can use ldrd. Do not allow very large Neon structure
23285 opaque modes in general registers; they would use too many. */
23286 if (regno
<= LAST_ARM_REGNUM
)
23288 if (ARM_NUM_REGS (mode
) > 4)
23294 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23297 if (regno
== FRAME_POINTER_REGNUM
23298 || regno
== ARG_POINTER_REGNUM
)
23299 /* We only allow integers in the fake hard registers. */
23300 return GET_MODE_CLASS (mode
) == MODE_INT
;
23305 /* Implement MODES_TIEABLE_P. */
23308 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23310 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23313 /* We specifically want to allow elements of "structure" modes to
23314 be tieable to the structure. This more general condition allows
23315 other rarer situations too. */
23317 && (VALID_NEON_DREG_MODE (mode1
)
23318 || VALID_NEON_QREG_MODE (mode1
)
23319 || VALID_NEON_STRUCT_MODE (mode1
))
23320 && (VALID_NEON_DREG_MODE (mode2
)
23321 || VALID_NEON_QREG_MODE (mode2
)
23322 || VALID_NEON_STRUCT_MODE (mode2
)))
23328 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23329 not used in arm mode. */
23332 arm_regno_class (int regno
)
23334 if (regno
== PC_REGNUM
)
23339 if (regno
== STACK_POINTER_REGNUM
)
23341 if (regno
== CC_REGNUM
)
23348 if (TARGET_THUMB2
&& regno
< 8)
23351 if ( regno
<= LAST_ARM_REGNUM
23352 || regno
== FRAME_POINTER_REGNUM
23353 || regno
== ARG_POINTER_REGNUM
)
23354 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23356 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23357 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23359 if (IS_VFP_REGNUM (regno
))
23361 if (regno
<= D7_VFP_REGNUM
)
23362 return VFP_D0_D7_REGS
;
23363 else if (regno
<= LAST_LO_VFP_REGNUM
)
23364 return VFP_LO_REGS
;
23366 return VFP_HI_REGS
;
23369 if (IS_IWMMXT_REGNUM (regno
))
23370 return IWMMXT_REGS
;
23372 if (IS_IWMMXT_GR_REGNUM (regno
))
23373 return IWMMXT_GR_REGS
;
23378 /* Handle a special case when computing the offset
23379 of an argument from the frame pointer. */
23381 arm_debugger_arg_offset (int value
, rtx addr
)
23385 /* We are only interested if dbxout_parms() failed to compute the offset. */
23389 /* We can only cope with the case where the address is held in a register. */
23393 /* If we are using the frame pointer to point at the argument, then
23394 an offset of 0 is correct. */
23395 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23398 /* If we are using the stack pointer to point at the
23399 argument, then an offset of 0 is correct. */
23400 /* ??? Check this is consistent with thumb2 frame layout. */
23401 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23402 && REGNO (addr
) == SP_REGNUM
)
23405 /* Oh dear. The argument is pointed to by a register rather
23406 than being held in a register, or being stored at a known
23407 offset from the frame pointer. Since GDB only understands
23408 those two kinds of argument we must translate the address
23409 held in the register into an offset from the frame pointer.
23410 We do this by searching through the insns for the function
23411 looking to see where this register gets its value. If the
23412 register is initialized from the frame pointer plus an offset
23413 then we are in luck and we can continue, otherwise we give up.
23415 This code is exercised by producing debugging information
23416 for a function with arguments like this:
23418 double func (double a, double b, int c, double d) {return d;}
23420 Without this code the stab for parameter 'd' will be set to
23421 an offset of 0 from the frame pointer, rather than 8. */
23423 /* The if() statement says:
23425 If the insn is a normal instruction
23426 and if the insn is setting the value in a register
23427 and if the register being set is the register holding the address of the argument
23428 and if the address is computing by an addition
23429 that involves adding to a register
23430 which is the frame pointer
23435 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23437 if ( NONJUMP_INSN_P (insn
)
23438 && GET_CODE (PATTERN (insn
)) == SET
23439 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23440 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23441 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23442 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23443 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23446 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23455 warning (0, "unable to compute real location of stacked parameter");
23456 value
= 8; /* XXX magic hack */
23462 /* Implement TARGET_PROMOTED_TYPE. */
23465 arm_promoted_type (const_tree t
)
23467 if (SCALAR_FLOAT_TYPE_P (t
)
23468 && TYPE_PRECISION (t
) == 16
23469 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23470 return float_type_node
;
23474 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23475 This simply adds HFmode as a supported mode; even though we don't
23476 implement arithmetic on this type directly, it's supported by
23477 optabs conversions, much the way the double-word arithmetic is
23478 special-cased in the default hook. */
23481 arm_scalar_mode_supported_p (machine_mode mode
)
23483 if (mode
== HFmode
)
23484 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23485 else if (ALL_FIXED_POINT_MODE_P (mode
))
23488 return default_scalar_mode_supported_p (mode
);
23491 /* Set the value of FLT_EVAL_METHOD.
23492 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23494 0: evaluate all operations and constants, whose semantic type has at
23495 most the range and precision of type float, to the range and
23496 precision of float; evaluate all other operations and constants to
23497 the range and precision of the semantic type;
23499 N, where _FloatN is a supported interchange floating type
23500 evaluate all operations and constants, whose semantic type has at
23501 most the range and precision of _FloatN type, to the range and
23502 precision of the _FloatN type; evaluate all other operations and
23503 constants to the range and precision of the semantic type;
23505 If we have the ARMv8.2-A extensions then we support _Float16 in native
23506 precision, so we should set this to 16. Otherwise, we support the type,
23507 but want to evaluate expressions in float precision, so set this to
23510 static enum flt_eval_method
23511 arm_excess_precision (enum excess_precision_type type
)
23515 case EXCESS_PRECISION_TYPE_FAST
:
23516 case EXCESS_PRECISION_TYPE_STANDARD
:
23517 /* We can calculate either in 16-bit range and precision or
23518 32-bit range and precision. Make that decision based on whether
23519 we have native support for the ARMv8.2-A 16-bit floating-point
23520 instructions or not. */
23521 return (TARGET_VFP_FP16INST
23522 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23523 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23524 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23525 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23527 gcc_unreachable ();
23529 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23533 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23534 _Float16 if we are using anything other than ieee format for 16-bit
23535 floating point. Otherwise, punt to the default implementation. */
23536 static machine_mode
23537 arm_floatn_mode (int n
, bool extended
)
23539 if (!extended
&& n
== 16)
23540 return arm_fp16_format
== ARM_FP16_FORMAT_IEEE
? HFmode
: VOIDmode
;
23542 return default_floatn_mode (n
, extended
);
23546 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23547 not to early-clobber SRC registers in the process.
23549 We assume that the operands described by SRC and DEST represent a
23550 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23551 number of components into which the copy has been decomposed. */
23553 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23557 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23558 || REGNO (operands
[0]) < REGNO (operands
[1]))
23560 for (i
= 0; i
< count
; i
++)
23562 operands
[2 * i
] = dest
[i
];
23563 operands
[2 * i
+ 1] = src
[i
];
23568 for (i
= 0; i
< count
; i
++)
23570 operands
[2 * i
] = dest
[count
- i
- 1];
23571 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23576 /* Split operands into moves from op[1] + op[2] into op[0]. */
23579 neon_split_vcombine (rtx operands
[3])
23581 unsigned int dest
= REGNO (operands
[0]);
23582 unsigned int src1
= REGNO (operands
[1]);
23583 unsigned int src2
= REGNO (operands
[2]);
23584 machine_mode halfmode
= GET_MODE (operands
[1]);
23585 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23586 rtx destlo
, desthi
;
23588 if (src1
== dest
&& src2
== dest
+ halfregs
)
23590 /* No-op move. Can't split to nothing; emit something. */
23591 emit_note (NOTE_INSN_DELETED
);
23595 /* Preserve register attributes for variable tracking. */
23596 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23597 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23598 GET_MODE_SIZE (halfmode
));
23600 /* Special case of reversed high/low parts. Use VSWP. */
23601 if (src2
== dest
&& src1
== dest
+ halfregs
)
23603 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23604 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23605 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23609 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23611 /* Try to avoid unnecessary moves if part of the result
23612 is in the right place already. */
23614 emit_move_insn (destlo
, operands
[1]);
23615 if (src2
!= dest
+ halfregs
)
23616 emit_move_insn (desthi
, operands
[2]);
23620 if (src2
!= dest
+ halfregs
)
23621 emit_move_insn (desthi
, operands
[2]);
23623 emit_move_insn (destlo
, operands
[1]);
23627 /* Return the number (counting from 0) of
23628 the least significant set bit in MASK. */
23631 number_of_first_bit_set (unsigned mask
)
23633 return ctz_hwi (mask
);
23636 /* Like emit_multi_reg_push, but allowing for a different set of
23637 registers to be described as saved. MASK is the set of registers
23638 to be saved; REAL_REGS is the set of registers to be described as
23639 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23642 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23644 unsigned long regno
;
23645 rtx par
[10], tmp
, reg
;
23649 /* Build the parallel of the registers actually being stored. */
23650 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23652 regno
= ctz_hwi (mask
);
23653 reg
= gen_rtx_REG (SImode
, regno
);
23656 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23658 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23663 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23664 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23665 tmp
= gen_frame_mem (BLKmode
, tmp
);
23666 tmp
= gen_rtx_SET (tmp
, par
[0]);
23669 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23670 insn
= emit_insn (tmp
);
23672 /* Always build the stack adjustment note for unwind info. */
23673 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23674 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23677 /* Build the parallel of the registers recorded as saved for unwind. */
23678 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23680 regno
= ctz_hwi (real_regs
);
23681 reg
= gen_rtx_REG (SImode
, regno
);
23683 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23684 tmp
= gen_frame_mem (SImode
, tmp
);
23685 tmp
= gen_rtx_SET (tmp
, reg
);
23686 RTX_FRAME_RELATED_P (tmp
) = 1;
23694 RTX_FRAME_RELATED_P (par
[0]) = 1;
23695 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23698 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23703 /* Emit code to push or pop registers to or from the stack. F is the
23704 assembly file. MASK is the registers to pop. */
23706 thumb_pop (FILE *f
, unsigned long mask
)
23709 int lo_mask
= mask
& 0xFF;
23710 int pushed_words
= 0;
23714 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23716 /* Special case. Do not generate a POP PC statement here, do it in
23718 thumb_exit (f
, -1);
23722 fprintf (f
, "\tpop\t{");
23724 /* Look at the low registers first. */
23725 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23729 asm_fprintf (f
, "%r", regno
);
23731 if ((lo_mask
& ~1) != 0)
23738 if (mask
& (1 << PC_REGNUM
))
23740 /* Catch popping the PC. */
23741 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23742 || IS_CMSE_ENTRY (arm_current_func_type ()))
23744 /* The PC is never poped directly, instead
23745 it is popped into r3 and then BX is used. */
23746 fprintf (f
, "}\n");
23748 thumb_exit (f
, -1);
23757 asm_fprintf (f
, "%r", PC_REGNUM
);
23761 fprintf (f
, "}\n");
23764 /* Generate code to return from a thumb function.
23765 If 'reg_containing_return_addr' is -1, then the return address is
23766 actually on the stack, at the stack pointer. */
23768 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23770 unsigned regs_available_for_popping
;
23771 unsigned regs_to_pop
;
23773 unsigned available
;
23777 int restore_a4
= FALSE
;
23779 /* Compute the registers we need to pop. */
23783 if (reg_containing_return_addr
== -1)
23785 regs_to_pop
|= 1 << LR_REGNUM
;
23789 if (TARGET_BACKTRACE
)
23791 /* Restore the (ARM) frame pointer and stack pointer. */
23792 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23796 /* If there is nothing to pop then just emit the BX instruction and
23798 if (pops_needed
== 0)
23800 if (crtl
->calls_eh_return
)
23801 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23803 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23805 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
23806 reg_containing_return_addr
);
23807 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
23810 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23813 /* Otherwise if we are not supporting interworking and we have not created
23814 a backtrace structure and the function was not entered in ARM mode then
23815 just pop the return address straight into the PC. */
23816 else if (!TARGET_INTERWORK
23817 && !TARGET_BACKTRACE
23818 && !is_called_in_ARM_mode (current_function_decl
)
23819 && !crtl
->calls_eh_return
23820 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23822 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23826 /* Find out how many of the (return) argument registers we can corrupt. */
23827 regs_available_for_popping
= 0;
23829 /* If returning via __builtin_eh_return, the bottom three registers
23830 all contain information needed for the return. */
23831 if (crtl
->calls_eh_return
)
23835 /* If we can deduce the registers used from the function's
23836 return value. This is more reliable that examining
23837 df_regs_ever_live_p () because that will be set if the register is
23838 ever used in the function, not just if the register is used
23839 to hold a return value. */
23841 if (crtl
->return_rtx
!= 0)
23842 mode
= GET_MODE (crtl
->return_rtx
);
23844 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23846 size
= GET_MODE_SIZE (mode
);
23850 /* In a void function we can use any argument register.
23851 In a function that returns a structure on the stack
23852 we can use the second and third argument registers. */
23853 if (mode
== VOIDmode
)
23854 regs_available_for_popping
=
23855 (1 << ARG_REGISTER (1))
23856 | (1 << ARG_REGISTER (2))
23857 | (1 << ARG_REGISTER (3));
23859 regs_available_for_popping
=
23860 (1 << ARG_REGISTER (2))
23861 | (1 << ARG_REGISTER (3));
23863 else if (size
<= 4)
23864 regs_available_for_popping
=
23865 (1 << ARG_REGISTER (2))
23866 | (1 << ARG_REGISTER (3));
23867 else if (size
<= 8)
23868 regs_available_for_popping
=
23869 (1 << ARG_REGISTER (3));
23872 /* Match registers to be popped with registers into which we pop them. */
23873 for (available
= regs_available_for_popping
,
23874 required
= regs_to_pop
;
23875 required
!= 0 && available
!= 0;
23876 available
&= ~(available
& - available
),
23877 required
&= ~(required
& - required
))
23880 /* If we have any popping registers left over, remove them. */
23882 regs_available_for_popping
&= ~available
;
23884 /* Otherwise if we need another popping register we can use
23885 the fourth argument register. */
23886 else if (pops_needed
)
23888 /* If we have not found any free argument registers and
23889 reg a4 contains the return address, we must move it. */
23890 if (regs_available_for_popping
== 0
23891 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23893 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23894 reg_containing_return_addr
= LR_REGNUM
;
23896 else if (size
> 12)
23898 /* Register a4 is being used to hold part of the return value,
23899 but we have dire need of a free, low register. */
23902 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23905 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23907 /* The fourth argument register is available. */
23908 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23914 /* Pop as many registers as we can. */
23915 thumb_pop (f
, regs_available_for_popping
);
23917 /* Process the registers we popped. */
23918 if (reg_containing_return_addr
== -1)
23920 /* The return address was popped into the lowest numbered register. */
23921 regs_to_pop
&= ~(1 << LR_REGNUM
);
23923 reg_containing_return_addr
=
23924 number_of_first_bit_set (regs_available_for_popping
);
23926 /* Remove this register for the mask of available registers, so that
23927 the return address will not be corrupted by further pops. */
23928 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23931 /* If we popped other registers then handle them here. */
23932 if (regs_available_for_popping
)
23936 /* Work out which register currently contains the frame pointer. */
23937 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23939 /* Move it into the correct place. */
23940 asm_fprintf (f
, "\tmov\t%r, %r\n",
23941 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23943 /* (Temporarily) remove it from the mask of popped registers. */
23944 regs_available_for_popping
&= ~(1 << frame_pointer
);
23945 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23947 if (regs_available_for_popping
)
23951 /* We popped the stack pointer as well,
23952 find the register that contains it. */
23953 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23955 /* Move it into the stack register. */
23956 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23958 /* At this point we have popped all necessary registers, so
23959 do not worry about restoring regs_available_for_popping
23960 to its correct value:
23962 assert (pops_needed == 0)
23963 assert (regs_available_for_popping == (1 << frame_pointer))
23964 assert (regs_to_pop == (1 << STACK_POINTER)) */
23968 /* Since we have just move the popped value into the frame
23969 pointer, the popping register is available for reuse, and
23970 we know that we still have the stack pointer left to pop. */
23971 regs_available_for_popping
|= (1 << frame_pointer
);
23975 /* If we still have registers left on the stack, but we no longer have
23976 any registers into which we can pop them, then we must move the return
23977 address into the link register and make available the register that
23979 if (regs_available_for_popping
== 0 && pops_needed
> 0)
23981 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
23983 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
23984 reg_containing_return_addr
);
23986 reg_containing_return_addr
= LR_REGNUM
;
23989 /* If we have registers left on the stack then pop some more.
23990 We know that at most we will want to pop FP and SP. */
23991 if (pops_needed
> 0)
23996 thumb_pop (f
, regs_available_for_popping
);
23998 /* We have popped either FP or SP.
23999 Move whichever one it is into the correct register. */
24000 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24001 move_to
= number_of_first_bit_set (regs_to_pop
);
24003 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24005 regs_to_pop
&= ~(1 << move_to
);
24010 /* If we still have not popped everything then we must have only
24011 had one register available to us and we are now popping the SP. */
24012 if (pops_needed
> 0)
24016 thumb_pop (f
, regs_available_for_popping
);
24018 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24020 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24022 assert (regs_to_pop == (1 << STACK_POINTER))
24023 assert (pops_needed == 1)
24027 /* If necessary restore the a4 register. */
24030 if (reg_containing_return_addr
!= LR_REGNUM
)
24032 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24033 reg_containing_return_addr
= LR_REGNUM
;
24036 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24039 if (crtl
->calls_eh_return
)
24040 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24042 /* Return to caller. */
24043 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24045 /* This is for the cases where LR is not being used to contain the return
24046 address. It may therefore contain information that we might not want
24047 to leak, hence it must be cleared. The value in R0 will never be a
24048 secret at this point, so it is safe to use it, see the clearing code
24049 in 'cmse_nonsecure_entry_clear_before_return'. */
24050 if (reg_containing_return_addr
!= LR_REGNUM
)
24051 asm_fprintf (f
, "\tmov\tlr, r0\n");
24053 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24054 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24057 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24060 /* Scan INSN just before assembler is output for it.
24061 For Thumb-1, we track the status of the condition codes; this
24062 information is used in the cbranchsi4_insn pattern. */
24064 thumb1_final_prescan_insn (rtx_insn
*insn
)
24066 if (flag_print_asm_name
)
24067 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24068 INSN_ADDRESSES (INSN_UID (insn
)));
24069 /* Don't overwrite the previous setter when we get to a cbranch. */
24070 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24072 enum attr_conds conds
;
24074 if (cfun
->machine
->thumb1_cc_insn
)
24076 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24077 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24080 conds
= get_attr_conds (insn
);
24081 if (conds
== CONDS_SET
)
24083 rtx set
= single_set (insn
);
24084 cfun
->machine
->thumb1_cc_insn
= insn
;
24085 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24086 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24087 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24088 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24090 rtx src1
= XEXP (SET_SRC (set
), 1);
24091 if (src1
== const0_rtx
)
24092 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24094 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24096 /* Record the src register operand instead of dest because
24097 cprop_hardreg pass propagates src. */
24098 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24101 else if (conds
!= CONDS_NOCOND
)
24102 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24105 /* Check if unexpected far jump is used. */
24106 if (cfun
->machine
->lr_save_eliminated
24107 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24108 internal_error("Unexpected thumb1 far jump");
24112 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24114 unsigned HOST_WIDE_INT mask
= 0xff;
24117 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24118 if (val
== 0) /* XXX */
24121 for (i
= 0; i
< 25; i
++)
24122 if ((val
& (mask
<< i
)) == val
)
24128 /* Returns nonzero if the current function contains,
24129 or might contain a far jump. */
24131 thumb_far_jump_used_p (void)
24134 bool far_jump
= false;
24135 unsigned int func_size
= 0;
24137 /* If we have already decided that far jumps may be used,
24138 do not bother checking again, and always return true even if
24139 it turns out that they are not being used. Once we have made
24140 the decision that far jumps are present (and that hence the link
24141 register will be pushed onto the stack) we cannot go back on it. */
24142 if (cfun
->machine
->far_jump_used
)
24145 /* If this function is not being called from the prologue/epilogue
24146 generation code then it must be being called from the
24147 INITIAL_ELIMINATION_OFFSET macro. */
24148 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24150 /* In this case we know that we are being asked about the elimination
24151 of the arg pointer register. If that register is not being used,
24152 then there are no arguments on the stack, and we do not have to
24153 worry that a far jump might force the prologue to push the link
24154 register, changing the stack offsets. In this case we can just
24155 return false, since the presence of far jumps in the function will
24156 not affect stack offsets.
24158 If the arg pointer is live (or if it was live, but has now been
24159 eliminated and so set to dead) then we do have to test to see if
24160 the function might contain a far jump. This test can lead to some
24161 false negatives, since before reload is completed, then length of
24162 branch instructions is not known, so gcc defaults to returning their
24163 longest length, which in turn sets the far jump attribute to true.
24165 A false negative will not result in bad code being generated, but it
24166 will result in a needless push and pop of the link register. We
24167 hope that this does not occur too often.
24169 If we need doubleword stack alignment this could affect the other
24170 elimination offsets so we can't risk getting it wrong. */
24171 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24172 cfun
->machine
->arg_pointer_live
= 1;
24173 else if (!cfun
->machine
->arg_pointer_live
)
24177 /* We should not change far_jump_used during or after reload, as there is
24178 no chance to change stack frame layout. */
24179 if (reload_in_progress
|| reload_completed
)
24182 /* Check to see if the function contains a branch
24183 insn with the far jump attribute set. */
24184 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24186 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24190 func_size
+= get_attr_length (insn
);
24193 /* Attribute far_jump will always be true for thumb1 before
24194 shorten_branch pass. So checking far_jump attribute before
24195 shorten_branch isn't much useful.
24197 Following heuristic tries to estimate more accurately if a far jump
24198 may finally be used. The heuristic is very conservative as there is
24199 no chance to roll-back the decision of not to use far jump.
24201 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24202 2-byte insn is associated with a 4 byte constant pool. Using
24203 function size 2048/3 as the threshold is conservative enough. */
24206 if ((func_size
* 3) >= 2048)
24208 /* Record the fact that we have decided that
24209 the function does use far jumps. */
24210 cfun
->machine
->far_jump_used
= 1;
24218 /* Return nonzero if FUNC must be entered in ARM mode. */
24220 is_called_in_ARM_mode (tree func
)
24222 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24224 /* Ignore the problem about functions whose address is taken. */
24225 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24229 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24235 /* Given the stack offsets and register mask in OFFSETS, decide how
24236 many additional registers to push instead of subtracting a constant
24237 from SP. For epilogues the principle is the same except we use pop.
24238 FOR_PROLOGUE indicates which we're generating. */
24240 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24242 HOST_WIDE_INT amount
;
24243 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24244 /* Extract a mask of the ones we can give to the Thumb's push/pop
24246 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24247 /* Then count how many other high registers will need to be pushed. */
24248 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24249 int n_free
, reg_base
, size
;
24251 if (!for_prologue
&& frame_pointer_needed
)
24252 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24254 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24256 /* If the stack frame size is 512 exactly, we can save one load
24257 instruction, which should make this a win even when optimizing
24259 if (!optimize_size
&& amount
!= 512)
24262 /* Can't do this if there are high registers to push. */
24263 if (high_regs_pushed
!= 0)
24266 /* Shouldn't do it in the prologue if no registers would normally
24267 be pushed at all. In the epilogue, also allow it if we'll have
24268 a pop insn for the PC. */
24271 || TARGET_BACKTRACE
24272 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24273 || TARGET_INTERWORK
24274 || crtl
->args
.pretend_args_size
!= 0))
24277 /* Don't do this if thumb_expand_prologue wants to emit instructions
24278 between the push and the stack frame allocation. */
24280 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24281 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24288 size
= arm_size_return_regs ();
24289 reg_base
= ARM_NUM_INTS (size
);
24290 live_regs_mask
>>= reg_base
;
24293 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24294 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24296 live_regs_mask
>>= 1;
24302 gcc_assert (amount
/ 4 * 4 == amount
);
24304 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24305 return (amount
- 508) / 4;
24306 if (amount
<= n_free
* 4)
24311 /* The bits which aren't usefully expanded as rtl. */
24313 thumb1_unexpanded_epilogue (void)
24315 arm_stack_offsets
*offsets
;
24317 unsigned long live_regs_mask
= 0;
24318 int high_regs_pushed
= 0;
24320 int had_to_push_lr
;
24323 if (cfun
->machine
->return_used_this_function
!= 0)
24326 if (IS_NAKED (arm_current_func_type ()))
24329 offsets
= arm_get_frame_offsets ();
24330 live_regs_mask
= offsets
->saved_regs_mask
;
24331 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24333 /* If we can deduce the registers used from the function's return value.
24334 This is more reliable that examining df_regs_ever_live_p () because that
24335 will be set if the register is ever used in the function, not just if
24336 the register is used to hold a return value. */
24337 size
= arm_size_return_regs ();
24339 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24342 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24343 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24346 /* The prolog may have pushed some high registers to use as
24347 work registers. e.g. the testsuite file:
24348 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24349 compiles to produce:
24350 push {r4, r5, r6, r7, lr}
24354 as part of the prolog. We have to undo that pushing here. */
24356 if (high_regs_pushed
)
24358 unsigned long mask
= live_regs_mask
& 0xff;
24361 /* The available low registers depend on the size of the value we are
24369 /* Oh dear! We have no low registers into which we can pop
24372 ("no low registers available for popping high registers");
24374 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24375 if (live_regs_mask
& (1 << next_hi_reg
))
24378 while (high_regs_pushed
)
24380 /* Find lo register(s) into which the high register(s) can
24382 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24384 if (mask
& (1 << regno
))
24385 high_regs_pushed
--;
24386 if (high_regs_pushed
== 0)
24390 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24392 /* Pop the values into the low register(s). */
24393 thumb_pop (asm_out_file
, mask
);
24395 /* Move the value(s) into the high registers. */
24396 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24398 if (mask
& (1 << regno
))
24400 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24403 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24404 if (live_regs_mask
& (1 << next_hi_reg
))
24409 live_regs_mask
&= ~0x0f00;
24412 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24413 live_regs_mask
&= 0xff;
24415 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24417 /* Pop the return address into the PC. */
24418 if (had_to_push_lr
)
24419 live_regs_mask
|= 1 << PC_REGNUM
;
24421 /* Either no argument registers were pushed or a backtrace
24422 structure was created which includes an adjusted stack
24423 pointer, so just pop everything. */
24424 if (live_regs_mask
)
24425 thumb_pop (asm_out_file
, live_regs_mask
);
24427 /* We have either just popped the return address into the
24428 PC or it is was kept in LR for the entire function.
24429 Note that thumb_pop has already called thumb_exit if the
24430 PC was in the list. */
24431 if (!had_to_push_lr
)
24432 thumb_exit (asm_out_file
, LR_REGNUM
);
24436 /* Pop everything but the return address. */
24437 if (live_regs_mask
)
24438 thumb_pop (asm_out_file
, live_regs_mask
);
24440 if (had_to_push_lr
)
24444 /* We have no free low regs, so save one. */
24445 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24449 /* Get the return address into a temporary register. */
24450 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24454 /* Move the return address to lr. */
24455 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24457 /* Restore the low register. */
24458 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24463 regno
= LAST_ARG_REGNUM
;
24468 /* Remove the argument registers that were pushed onto the stack. */
24469 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24470 SP_REGNUM
, SP_REGNUM
,
24471 crtl
->args
.pretend_args_size
);
24473 thumb_exit (asm_out_file
, regno
);
24479 /* Functions to save and restore machine-specific function data. */
24480 static struct machine_function
*
24481 arm_init_machine_status (void)
24483 struct machine_function
*machine
;
24484 machine
= ggc_cleared_alloc
<machine_function
> ();
24486 #if ARM_FT_UNKNOWN != 0
24487 machine
->func_type
= ARM_FT_UNKNOWN
;
24492 /* Return an RTX indicating where the return address to the
24493 calling function can be found. */
24495 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24500 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24503 /* Do anything needed before RTL is emitted for each function. */
24505 arm_init_expanders (void)
24507 /* Arrange to initialize and mark the machine per-function status. */
24508 init_machine_status
= arm_init_machine_status
;
24510 /* This is to stop the combine pass optimizing away the alignment
24511 adjustment of va_arg. */
24512 /* ??? It is claimed that this should not be necessary. */
24514 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24517 /* Check that FUNC is called with a different mode. */
24520 arm_change_mode_p (tree func
)
24522 if (TREE_CODE (func
) != FUNCTION_DECL
)
24525 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24528 callee_tree
= target_option_default_node
;
24530 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24531 int flags
= callee_opts
->x_target_flags
;
24533 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24536 /* Like arm_compute_initial_elimination offset. Simpler because there
24537 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24538 to point at the base of the local variables after static stack
24539 space for a function has been allocated. */
24542 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24544 arm_stack_offsets
*offsets
;
24546 offsets
= arm_get_frame_offsets ();
24550 case ARG_POINTER_REGNUM
:
24553 case STACK_POINTER_REGNUM
:
24554 return offsets
->outgoing_args
- offsets
->saved_args
;
24556 case FRAME_POINTER_REGNUM
:
24557 return offsets
->soft_frame
- offsets
->saved_args
;
24559 case ARM_HARD_FRAME_POINTER_REGNUM
:
24560 return offsets
->saved_regs
- offsets
->saved_args
;
24562 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24563 return offsets
->locals_base
- offsets
->saved_args
;
24566 gcc_unreachable ();
24570 case FRAME_POINTER_REGNUM
:
24573 case STACK_POINTER_REGNUM
:
24574 return offsets
->outgoing_args
- offsets
->soft_frame
;
24576 case ARM_HARD_FRAME_POINTER_REGNUM
:
24577 return offsets
->saved_regs
- offsets
->soft_frame
;
24579 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24580 return offsets
->locals_base
- offsets
->soft_frame
;
24583 gcc_unreachable ();
24588 gcc_unreachable ();
24592 /* Generate the function's prologue. */
24595 thumb1_expand_prologue (void)
24599 HOST_WIDE_INT amount
;
24600 HOST_WIDE_INT size
;
24601 arm_stack_offsets
*offsets
;
24602 unsigned long func_type
;
24604 unsigned long live_regs_mask
;
24605 unsigned long l_mask
;
24606 unsigned high_regs_pushed
= 0;
24607 bool lr_needs_saving
;
24609 func_type
= arm_current_func_type ();
24611 /* Naked functions don't have prologues. */
24612 if (IS_NAKED (func_type
))
24614 if (flag_stack_usage_info
)
24615 current_function_static_stack_size
= 0;
24619 if (IS_INTERRUPT (func_type
))
24621 error ("interrupt Service Routines cannot be coded in Thumb mode");
24625 if (is_called_in_ARM_mode (current_function_decl
))
24626 emit_insn (gen_prologue_thumb1_interwork ());
24628 offsets
= arm_get_frame_offsets ();
24629 live_regs_mask
= offsets
->saved_regs_mask
;
24630 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24632 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24633 l_mask
= live_regs_mask
& 0x40ff;
24634 /* Then count how many other high registers will need to be pushed. */
24635 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24637 if (crtl
->args
.pretend_args_size
)
24639 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24641 if (cfun
->machine
->uses_anonymous_args
)
24643 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24644 unsigned long mask
;
24646 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24647 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24649 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24653 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24654 stack_pointer_rtx
, x
));
24656 RTX_FRAME_RELATED_P (insn
) = 1;
24659 if (TARGET_BACKTRACE
)
24661 HOST_WIDE_INT offset
= 0;
24662 unsigned work_register
;
24663 rtx work_reg
, x
, arm_hfp_rtx
;
24665 /* We have been asked to create a stack backtrace structure.
24666 The code looks like this:
24670 0 sub SP, #16 Reserve space for 4 registers.
24671 2 push {R7} Push low registers.
24672 4 add R7, SP, #20 Get the stack pointer before the push.
24673 6 str R7, [SP, #8] Store the stack pointer
24674 (before reserving the space).
24675 8 mov R7, PC Get hold of the start of this code + 12.
24676 10 str R7, [SP, #16] Store it.
24677 12 mov R7, FP Get hold of the current frame pointer.
24678 14 str R7, [SP, #4] Store it.
24679 16 mov R7, LR Get hold of the current return address.
24680 18 str R7, [SP, #12] Store it.
24681 20 add R7, SP, #16 Point at the start of the
24682 backtrace structure.
24683 22 mov FP, R7 Put this value into the frame pointer. */
24685 work_register
= thumb_find_work_register (live_regs_mask
);
24686 work_reg
= gen_rtx_REG (SImode
, work_register
);
24687 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24689 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24690 stack_pointer_rtx
, GEN_INT (-16)));
24691 RTX_FRAME_RELATED_P (insn
) = 1;
24695 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24696 RTX_FRAME_RELATED_P (insn
) = 1;
24697 lr_needs_saving
= false;
24699 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24702 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24703 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24705 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24706 x
= gen_frame_mem (SImode
, x
);
24707 emit_move_insn (x
, work_reg
);
24709 /* Make sure that the instruction fetching the PC is in the right place
24710 to calculate "start of backtrace creation code + 12". */
24711 /* ??? The stores using the common WORK_REG ought to be enough to
24712 prevent the scheduler from doing anything weird. Failing that
24713 we could always move all of the following into an UNSPEC_VOLATILE. */
24716 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24717 emit_move_insn (work_reg
, x
);
24719 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24720 x
= gen_frame_mem (SImode
, x
);
24721 emit_move_insn (x
, work_reg
);
24723 emit_move_insn (work_reg
, arm_hfp_rtx
);
24725 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24726 x
= gen_frame_mem (SImode
, x
);
24727 emit_move_insn (x
, work_reg
);
24731 emit_move_insn (work_reg
, arm_hfp_rtx
);
24733 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24734 x
= gen_frame_mem (SImode
, x
);
24735 emit_move_insn (x
, work_reg
);
24737 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24738 emit_move_insn (work_reg
, x
);
24740 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24741 x
= gen_frame_mem (SImode
, x
);
24742 emit_move_insn (x
, work_reg
);
24745 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24746 emit_move_insn (work_reg
, x
);
24748 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24749 x
= gen_frame_mem (SImode
, x
);
24750 emit_move_insn (x
, work_reg
);
24752 x
= GEN_INT (offset
+ 12);
24753 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24755 emit_move_insn (arm_hfp_rtx
, work_reg
);
24757 /* Optimization: If we are not pushing any low registers but we are going
24758 to push some high registers then delay our first push. This will just
24759 be a push of LR and we can combine it with the push of the first high
24761 else if ((l_mask
& 0xff) != 0
24762 || (high_regs_pushed
== 0 && lr_needs_saving
))
24764 unsigned long mask
= l_mask
;
24765 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24766 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24767 RTX_FRAME_RELATED_P (insn
) = 1;
24768 lr_needs_saving
= false;
24771 if (high_regs_pushed
)
24773 unsigned pushable_regs
;
24774 unsigned next_hi_reg
;
24775 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24776 : crtl
->args
.info
.nregs
;
24777 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24779 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24780 if (live_regs_mask
& (1 << next_hi_reg
))
24783 /* Here we need to mask out registers used for passing arguments
24784 even if they can be pushed. This is to avoid using them to stash the high
24785 registers. Such kind of stash may clobber the use of arguments. */
24786 pushable_regs
= l_mask
& (~arg_regs_mask
);
24787 if (lr_needs_saving
)
24788 pushable_regs
&= ~(1 << LR_REGNUM
);
24790 if (pushable_regs
== 0)
24791 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24793 while (high_regs_pushed
> 0)
24795 unsigned long real_regs_mask
= 0;
24796 unsigned long push_mask
= 0;
24798 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24800 if (pushable_regs
& (1 << regno
))
24802 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24803 gen_rtx_REG (SImode
, next_hi_reg
));
24805 high_regs_pushed
--;
24806 real_regs_mask
|= (1 << next_hi_reg
);
24807 push_mask
|= (1 << regno
);
24809 if (high_regs_pushed
)
24811 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24813 if (live_regs_mask
& (1 << next_hi_reg
))
24821 /* If we had to find a work register and we have not yet
24822 saved the LR then add it to the list of regs to push. */
24823 if (lr_needs_saving
)
24825 push_mask
|= 1 << LR_REGNUM
;
24826 real_regs_mask
|= 1 << LR_REGNUM
;
24827 lr_needs_saving
= false;
24830 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
24831 RTX_FRAME_RELATED_P (insn
) = 1;
24835 /* Load the pic register before setting the frame pointer,
24836 so we can use r7 as a temporary work register. */
24837 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24838 arm_load_pic_register (live_regs_mask
);
24840 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24841 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24842 stack_pointer_rtx
);
24844 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24845 if (flag_stack_usage_info
)
24846 current_function_static_stack_size
= size
;
24848 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24849 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24850 sorry ("-fstack-check=specific for Thumb-1");
24852 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24853 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24858 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24859 GEN_INT (- amount
)));
24860 RTX_FRAME_RELATED_P (insn
) = 1;
24866 /* The stack decrement is too big for an immediate value in a single
24867 insn. In theory we could issue multiple subtracts, but after
24868 three of them it becomes more space efficient to place the full
24869 value in the constant pool and load into a register. (Also the
24870 ARM debugger really likes to see only one stack decrement per
24871 function). So instead we look for a scratch register into which
24872 we can load the decrement, and then we subtract this from the
24873 stack pointer. Unfortunately on the thumb the only available
24874 scratch registers are the argument registers, and we cannot use
24875 these as they may hold arguments to the function. Instead we
24876 attempt to locate a call preserved register which is used by this
24877 function. If we can find one, then we know that it will have
24878 been pushed at the start of the prologue and so we can corrupt
24880 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24881 if (live_regs_mask
& (1 << regno
))
24884 gcc_assert(regno
<= LAST_LO_REGNUM
);
24886 reg
= gen_rtx_REG (SImode
, regno
);
24888 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24890 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24891 stack_pointer_rtx
, reg
));
24893 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24894 plus_constant (Pmode
, stack_pointer_rtx
,
24896 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24897 RTX_FRAME_RELATED_P (insn
) = 1;
24901 if (frame_pointer_needed
)
24902 thumb_set_frame_pointer (offsets
);
24904 /* If we are profiling, make sure no instructions are scheduled before
24905 the call to mcount. Similarly if the user has requested no
24906 scheduling in the prolog. Similarly if we want non-call exceptions
24907 using the EABI unwinder, to prevent faulting instructions from being
24908 swapped with a stack adjustment. */
24909 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24910 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24911 && cfun
->can_throw_non_call_exceptions
))
24912 emit_insn (gen_blockage ());
24914 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24915 if (live_regs_mask
& 0xff)
24916 cfun
->machine
->lr_save_eliminated
= 0;
24919 /* Clear caller saved registers not used to pass return values and leaked
24920 condition flags before exiting a cmse_nonsecure_entry function. */
24923 cmse_nonsecure_entry_clear_before_return (void)
24925 uint64_t to_clear_mask
[2];
24926 uint32_t padding_bits_to_clear
= 0;
24927 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
24928 int regno
, maxregno
= IP_REGNUM
;
24932 to_clear_mask
[0] = (1ULL << (NUM_ARG_REGS
)) - 1;
24933 to_clear_mask
[0] |= (1ULL << IP_REGNUM
);
24935 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
24936 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
24937 to make sure the instructions used to clear them are present. */
24938 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
24940 uint64_t float_mask
= (1ULL << (D7_VFP_REGNUM
+ 1)) - 1;
24941 maxregno
= LAST_VFP_REGNUM
;
24943 float_mask
&= ~((1ULL << FIRST_VFP_REGNUM
) - 1);
24944 to_clear_mask
[0] |= float_mask
;
24946 float_mask
= (1ULL << (maxregno
- 63)) - 1;
24947 to_clear_mask
[1] = float_mask
;
24949 /* Make sure we don't clear the two scratch registers used to clear the
24950 relevant FPSCR bits in output_return_instruction. */
24951 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
24952 to_clear_mask
[0] &= ~(1ULL << IP_REGNUM
);
24953 emit_use (gen_rtx_REG (SImode
, 4));
24954 to_clear_mask
[0] &= ~(1ULL << 4);
24957 /* If the user has defined registers to be caller saved, these are no longer
24958 restored by the function before returning and must thus be cleared for
24959 security purposes. */
24960 for (regno
= NUM_ARG_REGS
; regno
< LAST_VFP_REGNUM
; regno
++)
24962 /* We do not touch registers that can be used to pass arguments as per
24963 the AAPCS, since these should never be made callee-saved by user
24965 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
24967 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
24969 if (call_used_regs
[regno
])
24970 to_clear_mask
[regno
/ 64] |= (1ULL << (regno
% 64));
24973 /* Make sure we do not clear the registers used to return the result in. */
24974 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
24975 if (!VOID_TYPE_P (result_type
))
24977 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
24979 /* No need to check that we return in registers, because we don't
24980 support returning on stack yet. */
24982 &= ~compute_not_to_clear_mask (result_type
, result_rtl
, 0,
24983 padding_bits_to_clear_ptr
);
24986 if (padding_bits_to_clear
!= 0)
24989 /* Padding bits to clear is not 0 so we know we are dealing with
24990 returning a composite type, which only uses r0. Let's make sure that
24991 r1-r3 is cleared too, we will use r1 as a scratch register. */
24992 gcc_assert ((to_clear_mask
[0] & 0xe) == 0xe);
24994 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
24996 /* Fill the lower half of the negated padding_bits_to_clear. */
24997 emit_move_insn (reg_rtx
,
24998 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
25000 /* Also fill the top half of the negated padding_bits_to_clear. */
25001 if (((~padding_bits_to_clear
) >> 16) > 0)
25002 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
25005 GEN_INT ((~padding_bits_to_clear
) >> 16)));
25007 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
25008 gen_rtx_REG (SImode
, R0_REGNUM
),
25012 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25014 if (!(to_clear_mask
[regno
/ 64] & (1ULL << (regno
% 64))))
25017 if (IS_VFP_REGNUM (regno
))
25019 /* If regno is an even vfp register and its successor is also to
25020 be cleared, use vmov. */
25021 if (TARGET_VFP_DOUBLE
25022 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25023 && to_clear_mask
[regno
/ 64] & (1ULL << ((regno
% 64) + 1)))
25025 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25026 CONST1_RTX (DFmode
));
25027 emit_use (gen_rtx_REG (DFmode
, regno
));
25032 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25033 CONST1_RTX (SFmode
));
25034 emit_use (gen_rtx_REG (SFmode
, regno
));
25041 if (regno
== R0_REGNUM
)
25042 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25045 /* R0 has either been cleared before, see code above, or it
25046 holds a return value, either way it is not secret
25048 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25049 gen_rtx_REG (SImode
, R0_REGNUM
));
25050 emit_use (gen_rtx_REG (SImode
, regno
));
25054 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25055 gen_rtx_REG (SImode
, LR_REGNUM
));
25056 emit_use (gen_rtx_REG (SImode
, regno
));
25062 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25063 POP instruction can be generated. LR should be replaced by PC. All
25064 the checks required are already done by USE_RETURN_INSN (). Hence,
25065 all we really need to check here is if single register is to be
25066 returned, or multiple register return. */
25068 thumb2_expand_return (bool simple_return
)
25071 unsigned long saved_regs_mask
;
25072 arm_stack_offsets
*offsets
;
25074 offsets
= arm_get_frame_offsets ();
25075 saved_regs_mask
= offsets
->saved_regs_mask
;
25077 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25078 if (saved_regs_mask
& (1 << i
))
25081 if (!simple_return
&& saved_regs_mask
)
25083 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25084 functions or adapt code to handle according to ACLE. This path should
25085 not be reachable for cmse_nonsecure_entry functions though we prefer
25086 to assert it for now to ensure that future code changes do not silently
25087 change this behavior. */
25088 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25091 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25092 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25093 rtx addr
= gen_rtx_MEM (SImode
,
25094 gen_rtx_POST_INC (SImode
,
25095 stack_pointer_rtx
));
25096 set_mem_alias_set (addr
, get_frame_alias_set ());
25097 XVECEXP (par
, 0, 0) = ret_rtx
;
25098 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25099 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25100 emit_jump_insn (par
);
25104 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25105 saved_regs_mask
|= (1 << PC_REGNUM
);
25106 arm_emit_multi_reg_pop (saved_regs_mask
);
25111 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25112 cmse_nonsecure_entry_clear_before_return ();
25113 emit_jump_insn (simple_return_rtx
);
25118 thumb1_expand_epilogue (void)
25120 HOST_WIDE_INT amount
;
25121 arm_stack_offsets
*offsets
;
25124 /* Naked functions don't have prologues. */
25125 if (IS_NAKED (arm_current_func_type ()))
25128 offsets
= arm_get_frame_offsets ();
25129 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25131 if (frame_pointer_needed
)
25133 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25134 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25136 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25138 gcc_assert (amount
>= 0);
25141 emit_insn (gen_blockage ());
25144 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25145 GEN_INT (amount
)));
25148 /* r3 is always free in the epilogue. */
25149 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25151 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25152 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25156 /* Emit a USE (stack_pointer_rtx), so that
25157 the stack adjustment will not be deleted. */
25158 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25160 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25161 emit_insn (gen_blockage ());
25163 /* Emit a clobber for each insn that will be restored in the epilogue,
25164 so that flow2 will get register lifetimes correct. */
25165 for (regno
= 0; regno
< 13; regno
++)
25166 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25167 emit_clobber (gen_rtx_REG (SImode
, regno
));
25169 if (! df_regs_ever_live_p (LR_REGNUM
))
25170 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25172 /* Clear all caller-saved regs that are not used to return. */
25173 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25174 cmse_nonsecure_entry_clear_before_return ();
25177 /* Epilogue code for APCS frame. */
25179 arm_expand_epilogue_apcs_frame (bool really_return
)
25181 unsigned long func_type
;
25182 unsigned long saved_regs_mask
;
25185 int floats_from_frame
= 0;
25186 arm_stack_offsets
*offsets
;
25188 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25189 func_type
= arm_current_func_type ();
25191 /* Get frame offsets for ARM. */
25192 offsets
= arm_get_frame_offsets ();
25193 saved_regs_mask
= offsets
->saved_regs_mask
;
25195 /* Find the offset of the floating-point save area in the frame. */
25197 = (offsets
->saved_args
25198 + arm_compute_static_chain_stack_bytes ()
25201 /* Compute how many core registers saved and how far away the floats are. */
25202 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25203 if (saved_regs_mask
& (1 << i
))
25206 floats_from_frame
+= 4;
25209 if (TARGET_HARD_FLOAT
)
25212 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25214 /* The offset is from IP_REGNUM. */
25215 int saved_size
= arm_get_vfp_saved_size ();
25216 if (saved_size
> 0)
25219 floats_from_frame
+= saved_size
;
25220 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25221 hard_frame_pointer_rtx
,
25222 GEN_INT (-floats_from_frame
)));
25223 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25224 ip_rtx
, hard_frame_pointer_rtx
);
25227 /* Generate VFP register multi-pop. */
25228 start_reg
= FIRST_VFP_REGNUM
;
25230 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25231 /* Look for a case where a reg does not need restoring. */
25232 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25233 && (!df_regs_ever_live_p (i
+ 1)
25234 || call_used_regs
[i
+ 1]))
25236 if (start_reg
!= i
)
25237 arm_emit_vfp_multi_reg_pop (start_reg
,
25238 (i
- start_reg
) / 2,
25239 gen_rtx_REG (SImode
,
25244 /* Restore the remaining regs that we have discovered (or possibly
25245 even all of them, if the conditional in the for loop never
25247 if (start_reg
!= i
)
25248 arm_emit_vfp_multi_reg_pop (start_reg
,
25249 (i
- start_reg
) / 2,
25250 gen_rtx_REG (SImode
, IP_REGNUM
));
25255 /* The frame pointer is guaranteed to be non-double-word aligned, as
25256 it is set to double-word-aligned old_stack_pointer - 4. */
25258 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25260 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25261 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25263 rtx addr
= gen_frame_mem (V2SImode
,
25264 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25266 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25267 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25268 gen_rtx_REG (V2SImode
, i
),
25274 /* saved_regs_mask should contain IP which contains old stack pointer
25275 at the time of activation creation. Since SP and IP are adjacent registers,
25276 we can restore the value directly into SP. */
25277 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25278 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25279 saved_regs_mask
|= (1 << SP_REGNUM
);
25281 /* There are two registers left in saved_regs_mask - LR and PC. We
25282 only need to restore LR (the return address), but to
25283 save time we can load it directly into PC, unless we need a
25284 special function exit sequence, or we are not really returning. */
25286 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25287 && !crtl
->calls_eh_return
)
25288 /* Delete LR from the register mask, so that LR on
25289 the stack is loaded into the PC in the register mask. */
25290 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25292 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25294 num_regs
= bit_count (saved_regs_mask
);
25295 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25298 emit_insn (gen_blockage ());
25299 /* Unwind the stack to just below the saved registers. */
25300 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25301 hard_frame_pointer_rtx
,
25302 GEN_INT (- 4 * num_regs
)));
25304 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25305 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25308 arm_emit_multi_reg_pop (saved_regs_mask
);
25310 if (IS_INTERRUPT (func_type
))
25312 /* Interrupt handlers will have pushed the
25313 IP onto the stack, so restore it now. */
25315 rtx addr
= gen_rtx_MEM (SImode
,
25316 gen_rtx_POST_INC (SImode
,
25317 stack_pointer_rtx
));
25318 set_mem_alias_set (addr
, get_frame_alias_set ());
25319 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25320 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25321 gen_rtx_REG (SImode
, IP_REGNUM
),
25325 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25328 if (crtl
->calls_eh_return
)
25329 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25331 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25333 if (IS_STACKALIGN (func_type
))
25334 /* Restore the original stack pointer. Before prologue, the stack was
25335 realigned and the original stack pointer saved in r0. For details,
25336 see comment in arm_expand_prologue. */
25337 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25339 emit_jump_insn (simple_return_rtx
);
25342 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25343 function is not a sibcall. */
25345 arm_expand_epilogue (bool really_return
)
25347 unsigned long func_type
;
25348 unsigned long saved_regs_mask
;
25352 arm_stack_offsets
*offsets
;
25354 func_type
= arm_current_func_type ();
25356 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25357 let output_return_instruction take care of instruction emission if any. */
25358 if (IS_NAKED (func_type
)
25359 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25362 emit_jump_insn (simple_return_rtx
);
25366 /* If we are throwing an exception, then we really must be doing a
25367 return, so we can't tail-call. */
25368 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25370 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25372 arm_expand_epilogue_apcs_frame (really_return
);
25376 /* Get frame offsets for ARM. */
25377 offsets
= arm_get_frame_offsets ();
25378 saved_regs_mask
= offsets
->saved_regs_mask
;
25379 num_regs
= bit_count (saved_regs_mask
);
25381 if (frame_pointer_needed
)
25384 /* Restore stack pointer if necessary. */
25387 /* In ARM mode, frame pointer points to first saved register.
25388 Restore stack pointer to last saved register. */
25389 amount
= offsets
->frame
- offsets
->saved_regs
;
25391 /* Force out any pending memory operations that reference stacked data
25392 before stack de-allocation occurs. */
25393 emit_insn (gen_blockage ());
25394 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25395 hard_frame_pointer_rtx
,
25396 GEN_INT (amount
)));
25397 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25399 hard_frame_pointer_rtx
);
25401 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25403 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25407 /* In Thumb-2 mode, the frame pointer points to the last saved
25409 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25412 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25413 hard_frame_pointer_rtx
,
25414 GEN_INT (amount
)));
25415 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25416 hard_frame_pointer_rtx
,
25417 hard_frame_pointer_rtx
);
25420 /* Force out any pending memory operations that reference stacked data
25421 before stack de-allocation occurs. */
25422 emit_insn (gen_blockage ());
25423 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25424 hard_frame_pointer_rtx
));
25425 arm_add_cfa_adjust_cfa_note (insn
, 0,
25427 hard_frame_pointer_rtx
);
25428 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25430 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25435 /* Pop off outgoing args and local frame to adjust stack pointer to
25436 last saved register. */
25437 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25441 /* Force out any pending memory operations that reference stacked data
25442 before stack de-allocation occurs. */
25443 emit_insn (gen_blockage ());
25444 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25446 GEN_INT (amount
)));
25447 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25448 stack_pointer_rtx
, stack_pointer_rtx
);
25449 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25451 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25455 if (TARGET_HARD_FLOAT
)
25457 /* Generate VFP register multi-pop. */
25458 int end_reg
= LAST_VFP_REGNUM
+ 1;
25460 /* Scan the registers in reverse order. We need to match
25461 any groupings made in the prologue and generate matching
25462 vldm operations. The need to match groups is because,
25463 unlike pop, vldm can only do consecutive regs. */
25464 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25465 /* Look for a case where a reg does not need restoring. */
25466 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25467 && (!df_regs_ever_live_p (i
+ 1)
25468 || call_used_regs
[i
+ 1]))
25470 /* Restore the regs discovered so far (from reg+2 to
25472 if (end_reg
> i
+ 2)
25473 arm_emit_vfp_multi_reg_pop (i
+ 2,
25474 (end_reg
- (i
+ 2)) / 2,
25475 stack_pointer_rtx
);
25479 /* Restore the remaining regs that we have discovered (or possibly
25480 even all of them, if the conditional in the for loop never
25482 if (end_reg
> i
+ 2)
25483 arm_emit_vfp_multi_reg_pop (i
+ 2,
25484 (end_reg
- (i
+ 2)) / 2,
25485 stack_pointer_rtx
);
25489 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25490 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25493 rtx addr
= gen_rtx_MEM (V2SImode
,
25494 gen_rtx_POST_INC (SImode
,
25495 stack_pointer_rtx
));
25496 set_mem_alias_set (addr
, get_frame_alias_set ());
25497 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25498 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25499 gen_rtx_REG (V2SImode
, i
),
25501 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25502 stack_pointer_rtx
, stack_pointer_rtx
);
25505 if (saved_regs_mask
)
25508 bool return_in_pc
= false;
25510 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25511 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25512 && !IS_CMSE_ENTRY (func_type
)
25513 && !IS_STACKALIGN (func_type
)
25515 && crtl
->args
.pretend_args_size
== 0
25516 && saved_regs_mask
& (1 << LR_REGNUM
)
25517 && !crtl
->calls_eh_return
)
25519 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25520 saved_regs_mask
|= (1 << PC_REGNUM
);
25521 return_in_pc
= true;
25524 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25526 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25527 if (saved_regs_mask
& (1 << i
))
25529 rtx addr
= gen_rtx_MEM (SImode
,
25530 gen_rtx_POST_INC (SImode
,
25531 stack_pointer_rtx
));
25532 set_mem_alias_set (addr
, get_frame_alias_set ());
25534 if (i
== PC_REGNUM
)
25536 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25537 XVECEXP (insn
, 0, 0) = ret_rtx
;
25538 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25540 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25541 insn
= emit_jump_insn (insn
);
25545 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25547 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25548 gen_rtx_REG (SImode
, i
),
25550 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25552 stack_pointer_rtx
);
25559 && current_tune
->prefer_ldrd_strd
25560 && !optimize_function_for_size_p (cfun
))
25563 thumb2_emit_ldrd_pop (saved_regs_mask
);
25564 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25565 arm_emit_ldrd_pop (saved_regs_mask
);
25567 arm_emit_multi_reg_pop (saved_regs_mask
);
25570 arm_emit_multi_reg_pop (saved_regs_mask
);
25578 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25582 rtx dwarf
= NULL_RTX
;
25584 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25586 GEN_INT (amount
)));
25588 RTX_FRAME_RELATED_P (tmp
) = 1;
25590 if (cfun
->machine
->uses_anonymous_args
)
25592 /* Restore pretend args. Refer arm_expand_prologue on how to save
25593 pretend_args in stack. */
25594 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25595 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25596 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25597 if (saved_regs_mask
& (1 << i
))
25599 rtx reg
= gen_rtx_REG (SImode
, i
);
25600 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25603 REG_NOTES (tmp
) = dwarf
;
25605 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25606 stack_pointer_rtx
, stack_pointer_rtx
);
25609 /* Clear all caller-saved regs that are not used to return. */
25610 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25612 /* CMSE_ENTRY always returns. */
25613 gcc_assert (really_return
);
25614 cmse_nonsecure_entry_clear_before_return ();
25617 if (!really_return
)
25620 if (crtl
->calls_eh_return
)
25621 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25623 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25625 if (IS_STACKALIGN (func_type
))
25626 /* Restore the original stack pointer. Before prologue, the stack was
25627 realigned and the original stack pointer saved in r0. For details,
25628 see comment in arm_expand_prologue. */
25629 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25631 emit_jump_insn (simple_return_rtx
);
25634 /* Implementation of insn prologue_thumb1_interwork. This is the first
25635 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25638 thumb1_output_interwork (void)
25641 FILE *f
= asm_out_file
;
25643 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25644 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25646 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25648 /* Generate code sequence to switch us into Thumb mode. */
25649 /* The .code 32 directive has already been emitted by
25650 ASM_DECLARE_FUNCTION_NAME. */
25651 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25652 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25654 /* Generate a label, so that the debugger will notice the
25655 change in instruction sets. This label is also used by
25656 the assembler to bypass the ARM code when this function
25657 is called from a Thumb encoded function elsewhere in the
25658 same file. Hence the definition of STUB_NAME here must
25659 agree with the definition in gas/config/tc-arm.c. */
25661 #define STUB_NAME ".real_start_of"
25663 fprintf (f
, "\t.code\t16\n");
25665 if (arm_dllexport_name_p (name
))
25666 name
= arm_strip_name_encoding (name
);
25668 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25669 fprintf (f
, "\t.thumb_func\n");
25670 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25675 /* Handle the case of a double word load into a low register from
25676 a computed memory address. The computed address may involve a
25677 register which is overwritten by the load. */
25679 thumb_load_double_from_address (rtx
*operands
)
25687 gcc_assert (REG_P (operands
[0]));
25688 gcc_assert (MEM_P (operands
[1]));
25690 /* Get the memory address. */
25691 addr
= XEXP (operands
[1], 0);
25693 /* Work out how the memory address is computed. */
25694 switch (GET_CODE (addr
))
25697 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25699 if (REGNO (operands
[0]) == REGNO (addr
))
25701 output_asm_insn ("ldr\t%H0, %2", operands
);
25702 output_asm_insn ("ldr\t%0, %1", operands
);
25706 output_asm_insn ("ldr\t%0, %1", operands
);
25707 output_asm_insn ("ldr\t%H0, %2", operands
);
25712 /* Compute <address> + 4 for the high order load. */
25713 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25715 output_asm_insn ("ldr\t%0, %1", operands
);
25716 output_asm_insn ("ldr\t%H0, %2", operands
);
25720 arg1
= XEXP (addr
, 0);
25721 arg2
= XEXP (addr
, 1);
25723 if (CONSTANT_P (arg1
))
25724 base
= arg2
, offset
= arg1
;
25726 base
= arg1
, offset
= arg2
;
25728 gcc_assert (REG_P (base
));
25730 /* Catch the case of <address> = <reg> + <reg> */
25731 if (REG_P (offset
))
25733 int reg_offset
= REGNO (offset
);
25734 int reg_base
= REGNO (base
);
25735 int reg_dest
= REGNO (operands
[0]);
25737 /* Add the base and offset registers together into the
25738 higher destination register. */
25739 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25740 reg_dest
+ 1, reg_base
, reg_offset
);
25742 /* Load the lower destination register from the address in
25743 the higher destination register. */
25744 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25745 reg_dest
, reg_dest
+ 1);
25747 /* Load the higher destination register from its own address
25749 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25750 reg_dest
+ 1, reg_dest
+ 1);
25754 /* Compute <address> + 4 for the high order load. */
25755 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25757 /* If the computed address is held in the low order register
25758 then load the high order register first, otherwise always
25759 load the low order register first. */
25760 if (REGNO (operands
[0]) == REGNO (base
))
25762 output_asm_insn ("ldr\t%H0, %2", operands
);
25763 output_asm_insn ("ldr\t%0, %1", operands
);
25767 output_asm_insn ("ldr\t%0, %1", operands
);
25768 output_asm_insn ("ldr\t%H0, %2", operands
);
25774 /* With no registers to worry about we can just load the value
25776 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25778 output_asm_insn ("ldr\t%H0, %2", operands
);
25779 output_asm_insn ("ldr\t%0, %1", operands
);
25783 gcc_unreachable ();
25790 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25795 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25796 std::swap (operands
[4], operands
[5]);
25798 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25799 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25803 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25804 std::swap (operands
[4], operands
[5]);
25805 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25806 std::swap (operands
[5], operands
[6]);
25807 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25808 std::swap (operands
[4], operands
[5]);
25810 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25811 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25815 gcc_unreachable ();
25821 /* Output a call-via instruction for thumb state. */
25823 thumb_call_via_reg (rtx reg
)
25825 int regno
= REGNO (reg
);
25828 gcc_assert (regno
< LR_REGNUM
);
25830 /* If we are in the normal text section we can use a single instance
25831 per compilation unit. If we are doing function sections, then we need
25832 an entry per section, since we can't rely on reachability. */
25833 if (in_section
== text_section
)
25835 thumb_call_reg_needed
= 1;
25837 if (thumb_call_via_label
[regno
] == NULL
)
25838 thumb_call_via_label
[regno
] = gen_label_rtx ();
25839 labelp
= thumb_call_via_label
+ regno
;
25843 if (cfun
->machine
->call_via
[regno
] == NULL
)
25844 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25845 labelp
= cfun
->machine
->call_via
+ regno
;
25848 output_asm_insn ("bl\t%a0", labelp
);
25852 /* Routines for generating rtl. */
25854 thumb_expand_movmemqi (rtx
*operands
)
25856 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25857 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25858 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25859 HOST_WIDE_INT offset
= 0;
25863 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25869 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25875 rtx reg
= gen_reg_rtx (SImode
);
25876 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25877 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25884 rtx reg
= gen_reg_rtx (HImode
);
25885 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25886 plus_constant (Pmode
, in
,
25888 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25897 rtx reg
= gen_reg_rtx (QImode
);
25898 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25899 plus_constant (Pmode
, in
,
25901 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25908 thumb_reload_out_hi (rtx
*operands
)
25910 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25913 /* Return the length of a function name prefix
25914 that starts with the character 'c'. */
25916 arm_get_strip_length (int c
)
25920 ARM_NAME_ENCODING_LENGTHS
25925 /* Return a pointer to a function's name with any
25926 and all prefix encodings stripped from it. */
25928 arm_strip_name_encoding (const char *name
)
25932 while ((skip
= arm_get_strip_length (* name
)))
25938 /* If there is a '*' anywhere in the name's prefix, then
25939 emit the stripped name verbatim, otherwise prepend an
25940 underscore if leading underscores are being used. */
25942 arm_asm_output_labelref (FILE *stream
, const char *name
)
25947 while ((skip
= arm_get_strip_length (* name
)))
25949 verbatim
|= (*name
== '*');
25954 fputs (name
, stream
);
25956 asm_fprintf (stream
, "%U%s", name
);
25959 /* This function is used to emit an EABI tag and its associated value.
25960 We emit the numerical value of the tag in case the assembler does not
25961 support textual tags. (Eg gas prior to 2.20). If requested we include
25962 the tag name in a comment so that anyone reading the assembler output
25963 will know which tag is being set.
25965 This function is not static because arm-c.c needs it too. */
25968 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25970 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25971 if (flag_verbose_asm
|| flag_debug_asm
)
25972 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25973 asm_fprintf (asm_out_file
, "\n");
25976 /* This function is used to print CPU tuning information as comment
25977 in assembler file. Pointers are not printed for now. */
25980 arm_print_tune_info (void)
25982 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
25983 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
25984 current_tune
->constant_limit
);
25985 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25986 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
25987 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25988 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
25989 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25990 "prefetch.l1_cache_size:\t%d\n",
25991 current_tune
->prefetch
.l1_cache_size
);
25992 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25993 "prefetch.l1_cache_line_size:\t%d\n",
25994 current_tune
->prefetch
.l1_cache_line_size
);
25995 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25996 "prefer_constant_pool:\t%d\n",
25997 (int) current_tune
->prefer_constant_pool
);
25998 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25999 "branch_cost:\t(s:speed, p:predictable)\n");
26000 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26001 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26002 current_tune
->branch_cost (false, false));
26003 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26004 current_tune
->branch_cost (false, true));
26005 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26006 current_tune
->branch_cost (true, false));
26007 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26008 current_tune
->branch_cost (true, true));
26009 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26010 "prefer_ldrd_strd:\t%d\n",
26011 (int) current_tune
->prefer_ldrd_strd
);
26012 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26013 "logical_op_non_short_circuit:\t[%d,%d]\n",
26014 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26015 (int) current_tune
->logical_op_non_short_circuit_arm
);
26016 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26017 "prefer_neon_for_64bits:\t%d\n",
26018 (int) current_tune
->prefer_neon_for_64bits
);
26019 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26020 "disparage_flag_setting_t16_encodings:\t%d\n",
26021 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26022 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26023 "string_ops_prefer_neon:\t%d\n",
26024 (int) current_tune
->string_ops_prefer_neon
);
26025 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26026 "max_insns_inline_memset:\t%d\n",
26027 current_tune
->max_insns_inline_memset
);
26028 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26029 current_tune
->fusible_ops
);
26030 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26031 (int) current_tune
->sched_autopref
);
26035 arm_file_start (void)
26041 /* We don't have a specified CPU. Use the architecture to
26044 Note: it might be better to do this unconditionally, then the
26045 assembler would not need to know about all new CPU names as
26047 if (!arm_active_target
.core_name
)
26049 /* armv7ve doesn't support any extensions. */
26050 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26052 /* Keep backward compatability for assemblers
26053 which don't support armv7ve. */
26054 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26055 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26056 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26057 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26058 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26062 const char* pos
= strchr (arm_active_target
.arch_name
, '+');
26066 gcc_assert (strlen (arm_active_target
.arch_name
)
26067 <= sizeof (buf
) / sizeof (*pos
));
26068 strncpy (buf
, arm_active_target
.arch_name
,
26069 (pos
- arm_active_target
.arch_name
) * sizeof (*pos
));
26070 buf
[pos
- arm_active_target
.arch_name
] = '\0';
26071 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
26072 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
26075 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26076 arm_active_target
.arch_name
);
26079 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26080 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26081 arm_active_target
.core_name
+ 8);
26084 const char* truncated_name
26085 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26086 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26089 if (print_tune_info
)
26090 arm_print_tune_info ();
26092 if (! TARGET_SOFT_FLOAT
)
26094 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26095 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26097 if (TARGET_HARD_FLOAT_ABI
)
26098 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26101 /* Some of these attributes only apply when the corresponding features
26102 are used. However we don't have any easy way of figuring this out.
26103 Conservatively record the setting that would have been used. */
26105 if (flag_rounding_math
)
26106 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26108 if (!flag_unsafe_math_optimizations
)
26110 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26111 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26113 if (flag_signaling_nans
)
26114 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26116 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26117 flag_finite_math_only
? 1 : 3);
26119 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26120 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26121 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26122 flag_short_enums
? 1 : 2);
26124 /* Tag_ABI_optimization_goals. */
26127 else if (optimize
>= 2)
26133 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26135 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26138 if (arm_fp16_format
)
26139 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26140 (int) arm_fp16_format
);
26142 if (arm_lang_output_object_attributes_hook
)
26143 arm_lang_output_object_attributes_hook();
26146 default_file_start ();
26150 arm_file_end (void)
26154 if (NEED_INDICATE_EXEC_STACK
)
26155 /* Add .note.GNU-stack. */
26156 file_end_indicate_exec_stack ();
26158 if (! thumb_call_reg_needed
)
26161 switch_to_section (text_section
);
26162 asm_fprintf (asm_out_file
, "\t.code 16\n");
26163 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26165 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26167 rtx label
= thumb_call_via_label
[regno
];
26171 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26172 CODE_LABEL_NUMBER (label
));
26173 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26179 /* Symbols in the text segment can be accessed without indirecting via the
26180 constant pool; it may take an extra binary operation, but this is still
26181 faster than indirecting via memory. Don't do this when not optimizing,
26182 since we won't be calculating al of the offsets necessary to do this
26186 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26188 if (optimize
> 0 && TREE_CONSTANT (decl
))
26189 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26191 default_encode_section_info (decl
, rtl
, first
);
26193 #endif /* !ARM_PE */
26196 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26198 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26199 && !strcmp (prefix
, "L"))
26201 arm_ccfsm_state
= 0;
26202 arm_target_insn
= NULL
;
26204 default_internal_label (stream
, prefix
, labelno
);
26207 /* Output code to add DELTA to the first argument, and then jump
26208 to FUNCTION. Used for C++ multiple inheritance. */
26211 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26212 HOST_WIDE_INT
, tree function
)
26214 static int thunk_label
= 0;
26217 int mi_delta
= delta
;
26218 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26220 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26223 mi_delta
= - mi_delta
;
26225 final_start_function (emit_barrier (), file
, 1);
26229 int labelno
= thunk_label
++;
26230 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26231 /* Thunks are entered in arm mode when available. */
26232 if (TARGET_THUMB1_ONLY
)
26234 /* push r3 so we can use it as a temporary. */
26235 /* TODO: Omit this save if r3 is not used. */
26236 fputs ("\tpush {r3}\n", file
);
26237 fputs ("\tldr\tr3, ", file
);
26241 fputs ("\tldr\tr12, ", file
);
26243 assemble_name (file
, label
);
26244 fputc ('\n', file
);
26247 /* If we are generating PIC, the ldr instruction below loads
26248 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26249 the address of the add + 8, so we have:
26251 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26254 Note that we have "+ 1" because some versions of GNU ld
26255 don't set the low bit of the result for R_ARM_REL32
26256 relocations against thumb function symbols.
26257 On ARMv6M this is +4, not +8. */
26258 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26259 assemble_name (file
, labelpc
);
26260 fputs (":\n", file
);
26261 if (TARGET_THUMB1_ONLY
)
26263 /* This is 2 insns after the start of the thunk, so we know it
26264 is 4-byte aligned. */
26265 fputs ("\tadd\tr3, pc, r3\n", file
);
26266 fputs ("\tmov r12, r3\n", file
);
26269 fputs ("\tadd\tr12, pc, r12\n", file
);
26271 else if (TARGET_THUMB1_ONLY
)
26272 fputs ("\tmov r12, r3\n", file
);
26274 if (TARGET_THUMB1_ONLY
)
26276 if (mi_delta
> 255)
26278 fputs ("\tldr\tr3, ", file
);
26279 assemble_name (file
, label
);
26280 fputs ("+4\n", file
);
26281 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26282 mi_op
, this_regno
, this_regno
);
26284 else if (mi_delta
!= 0)
26286 /* Thumb1 unified syntax requires s suffix in instruction name when
26287 one of the operands is immediate. */
26288 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26289 mi_op
, this_regno
, this_regno
,
26295 /* TODO: Use movw/movt for large constants when available. */
26296 while (mi_delta
!= 0)
26298 if ((mi_delta
& (3 << shift
)) == 0)
26302 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26303 mi_op
, this_regno
, this_regno
,
26304 mi_delta
& (0xff << shift
));
26305 mi_delta
&= ~(0xff << shift
);
26312 if (TARGET_THUMB1_ONLY
)
26313 fputs ("\tpop\t{r3}\n", file
);
26315 fprintf (file
, "\tbx\tr12\n");
26316 ASM_OUTPUT_ALIGN (file
, 2);
26317 assemble_name (file
, label
);
26318 fputs (":\n", file
);
26321 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26322 rtx tem
= XEXP (DECL_RTL (function
), 0);
26323 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26324 pipeline offset is four rather than eight. Adjust the offset
26326 tem
= plus_constant (GET_MODE (tem
), tem
,
26327 TARGET_THUMB1_ONLY
? -3 : -7);
26328 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26330 gen_rtx_SYMBOL_REF (Pmode
,
26331 ggc_strdup (labelpc
)));
26332 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26335 /* Output ".word .LTHUNKn". */
26336 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26338 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26339 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26343 fputs ("\tb\t", file
);
26344 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26345 if (NEED_PLT_RELOC
)
26346 fputs ("(PLT)", file
);
26347 fputc ('\n', file
);
26350 final_end_function ();
26353 /* MI thunk handling for TARGET_32BIT. */
26356 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26357 HOST_WIDE_INT vcall_offset
, tree function
)
26359 /* On ARM, this_regno is R0 or R1 depending on
26360 whether the function returns an aggregate or not.
26362 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26364 ? R1_REGNUM
: R0_REGNUM
);
26366 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26367 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26368 reload_completed
= 1;
26369 emit_note (NOTE_INSN_PROLOGUE_END
);
26371 /* Add DELTA to THIS_RTX. */
26373 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26374 delta
, this_rtx
, this_rtx
, false);
26376 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26377 if (vcall_offset
!= 0)
26379 /* Load *THIS_RTX. */
26380 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26381 /* Compute *THIS_RTX + VCALL_OFFSET. */
26382 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26384 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26385 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26386 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26389 /* Generate a tail call to the target function. */
26390 if (!TREE_USED (function
))
26392 assemble_external (function
);
26393 TREE_USED (function
) = 1;
26395 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26396 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26397 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26398 SIBLING_CALL_P (insn
) = 1;
26400 insn
= get_insns ();
26401 shorten_branches (insn
);
26402 final_start_function (insn
, file
, 1);
26403 final (insn
, file
, 1);
26404 final_end_function ();
26406 /* Stop pretending this is a post-reload pass. */
26407 reload_completed
= 0;
26410 /* Output code to add DELTA to the first argument, and then jump
26411 to FUNCTION. Used for C++ multiple inheritance. */
26414 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26415 HOST_WIDE_INT vcall_offset
, tree function
)
26418 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26420 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26424 arm_emit_vector_const (FILE *file
, rtx x
)
26427 const char * pattern
;
26429 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26431 switch (GET_MODE (x
))
26433 case V2SImode
: pattern
= "%08x"; break;
26434 case V4HImode
: pattern
= "%04x"; break;
26435 case V8QImode
: pattern
= "%02x"; break;
26436 default: gcc_unreachable ();
26439 fprintf (file
, "0x");
26440 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26444 element
= CONST_VECTOR_ELT (x
, i
);
26445 fprintf (file
, pattern
, INTVAL (element
));
26451 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26452 HFmode constant pool entries are actually loaded with ldr. */
26454 arm_emit_fp16_const (rtx c
)
26458 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26459 if (WORDS_BIG_ENDIAN
)
26460 assemble_zeros (2);
26461 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26462 if (!WORDS_BIG_ENDIAN
)
26463 assemble_zeros (2);
26467 arm_output_load_gr (rtx
*operands
)
26474 if (!MEM_P (operands
[1])
26475 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26476 || !REG_P (reg
= XEXP (sum
, 0))
26477 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26478 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26479 return "wldrw%?\t%0, %1";
26481 /* Fix up an out-of-range load of a GR register. */
26482 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26483 wcgr
= operands
[0];
26485 output_asm_insn ("ldr%?\t%0, %1", operands
);
26487 operands
[0] = wcgr
;
26489 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26490 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26495 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26497 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26498 named arg and all anonymous args onto the stack.
26499 XXX I know the prologue shouldn't be pushing registers, but it is faster
26503 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26507 int second_time ATTRIBUTE_UNUSED
)
26509 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26512 cfun
->machine
->uses_anonymous_args
= 1;
26513 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26515 nregs
= pcum
->aapcs_ncrn
;
26516 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
26520 nregs
= pcum
->nregs
;
26522 if (nregs
< NUM_ARG_REGS
)
26523 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26526 /* We can't rely on the caller doing the proper promotion when
26527 using APCS or ATPCS. */
26530 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26532 return !TARGET_AAPCS_BASED
;
26535 static machine_mode
26536 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26538 int *punsignedp ATTRIBUTE_UNUSED
,
26539 const_tree fntype ATTRIBUTE_UNUSED
,
26540 int for_return ATTRIBUTE_UNUSED
)
26542 if (GET_MODE_CLASS (mode
) == MODE_INT
26543 && GET_MODE_SIZE (mode
) < 4)
26549 /* AAPCS based ABIs use short enums by default. */
26552 arm_default_short_enums (void)
26554 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26558 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26561 arm_align_anon_bitfield (void)
26563 return TARGET_AAPCS_BASED
;
26567 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26570 arm_cxx_guard_type (void)
26572 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26576 /* The EABI says test the least significant bit of a guard variable. */
26579 arm_cxx_guard_mask_bit (void)
26581 return TARGET_AAPCS_BASED
;
26585 /* The EABI specifies that all array cookies are 8 bytes long. */
26588 arm_get_cookie_size (tree type
)
26592 if (!TARGET_AAPCS_BASED
)
26593 return default_cxx_get_cookie_size (type
);
26595 size
= build_int_cst (sizetype
, 8);
26600 /* The EABI says that array cookies should also contain the element size. */
26603 arm_cookie_has_size (void)
26605 return TARGET_AAPCS_BASED
;
26609 /* The EABI says constructors and destructors should return a pointer to
26610 the object constructed/destroyed. */
26613 arm_cxx_cdtor_returns_this (void)
26615 return TARGET_AAPCS_BASED
;
26618 /* The EABI says that an inline function may never be the key
26622 arm_cxx_key_method_may_be_inline (void)
26624 return !TARGET_AAPCS_BASED
;
26628 arm_cxx_determine_class_data_visibility (tree decl
)
26630 if (!TARGET_AAPCS_BASED
26631 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26634 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26635 is exported. However, on systems without dynamic vague linkage,
26636 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26637 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26638 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26640 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26641 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26645 arm_cxx_class_data_always_comdat (void)
26647 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26648 vague linkage if the class has no key function. */
26649 return !TARGET_AAPCS_BASED
;
26653 /* The EABI says __aeabi_atexit should be used to register static
26657 arm_cxx_use_aeabi_atexit (void)
26659 return TARGET_AAPCS_BASED
;
26664 arm_set_return_address (rtx source
, rtx scratch
)
26666 arm_stack_offsets
*offsets
;
26667 HOST_WIDE_INT delta
;
26669 unsigned long saved_regs
;
26671 offsets
= arm_get_frame_offsets ();
26672 saved_regs
= offsets
->saved_regs_mask
;
26674 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26675 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26678 if (frame_pointer_needed
)
26679 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26682 /* LR will be the first saved register. */
26683 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26688 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26689 GEN_INT (delta
& ~4095)));
26694 addr
= stack_pointer_rtx
;
26696 addr
= plus_constant (Pmode
, addr
, delta
);
26698 /* The store needs to be marked as frame related in order to prevent
26699 DSE from deleting it as dead if it is based on fp. */
26700 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26701 RTX_FRAME_RELATED_P (insn
) = 1;
26702 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26708 thumb_set_return_address (rtx source
, rtx scratch
)
26710 arm_stack_offsets
*offsets
;
26711 HOST_WIDE_INT delta
;
26712 HOST_WIDE_INT limit
;
26715 unsigned long mask
;
26719 offsets
= arm_get_frame_offsets ();
26720 mask
= offsets
->saved_regs_mask
;
26721 if (mask
& (1 << LR_REGNUM
))
26724 /* Find the saved regs. */
26725 if (frame_pointer_needed
)
26727 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26728 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26734 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26737 /* Allow for the stack frame. */
26738 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26740 /* The link register is always the first saved register. */
26743 /* Construct the address. */
26744 addr
= gen_rtx_REG (SImode
, reg
);
26747 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26748 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26752 addr
= plus_constant (Pmode
, addr
, delta
);
26754 /* The store needs to be marked as frame related in order to prevent
26755 DSE from deleting it as dead if it is based on fp. */
26756 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26757 RTX_FRAME_RELATED_P (insn
) = 1;
26758 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26761 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26764 /* Implements target hook vector_mode_supported_p. */
26766 arm_vector_mode_supported_p (machine_mode mode
)
26768 /* Neon also supports V2SImode, etc. listed in the clause below. */
26769 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26770 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26771 || mode
== V2DImode
|| mode
== V8HFmode
))
26774 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26775 && ((mode
== V2SImode
)
26776 || (mode
== V4HImode
)
26777 || (mode
== V8QImode
)))
26780 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26781 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26782 || mode
== V2HAmode
))
26788 /* Implements target hook array_mode_supported_p. */
26791 arm_array_mode_supported_p (machine_mode mode
,
26792 unsigned HOST_WIDE_INT nelems
)
26795 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26796 && (nelems
>= 2 && nelems
<= 4))
26802 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26803 registers when autovectorizing for Neon, at least until multiple vector
26804 widths are supported properly by the middle-end. */
26806 static machine_mode
26807 arm_preferred_simd_mode (machine_mode mode
)
26813 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26815 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26817 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26819 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26821 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26828 if (TARGET_REALLY_IWMMXT
)
26844 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26846 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26847 using r0-r4 for function arguments, r7 for the stack frame and don't have
26848 enough left over to do doubleword arithmetic. For Thumb-2 all the
26849 potentially problematic instructions accept high registers so this is not
26850 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26851 that require many low registers. */
26853 arm_class_likely_spilled_p (reg_class_t rclass
)
26855 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26856 || rclass
== CC_REG
)
26862 /* Implements target hook small_register_classes_for_mode_p. */
26864 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26866 return TARGET_THUMB1
;
26869 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26870 ARM insns and therefore guarantee that the shift count is modulo 256.
26871 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26872 guarantee no particular behavior for out-of-range counts. */
26874 static unsigned HOST_WIDE_INT
26875 arm_shift_truncation_mask (machine_mode mode
)
26877 return mode
== SImode
? 255 : 0;
26881 /* Map internal gcc register numbers to DWARF2 register numbers. */
26884 arm_dbx_register_number (unsigned int regno
)
26889 if (IS_VFP_REGNUM (regno
))
26891 /* See comment in arm_dwarf_register_span. */
26892 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26893 return 64 + regno
- FIRST_VFP_REGNUM
;
26895 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26898 if (IS_IWMMXT_GR_REGNUM (regno
))
26899 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26901 if (IS_IWMMXT_REGNUM (regno
))
26902 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26904 return DWARF_FRAME_REGISTERS
;
26907 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26908 GCC models tham as 64 32-bit registers, so we need to describe this to
26909 the DWARF generation code. Other registers can use the default. */
26911 arm_dwarf_register_span (rtx rtl
)
26919 regno
= REGNO (rtl
);
26920 if (!IS_VFP_REGNUM (regno
))
26923 /* XXX FIXME: The EABI defines two VFP register ranges:
26924 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26926 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26927 corresponding D register. Until GDB supports this, we shall use the
26928 legacy encodings. We also use these encodings for D0-D15 for
26929 compatibility with older debuggers. */
26930 mode
= GET_MODE (rtl
);
26931 if (GET_MODE_SIZE (mode
) < 8)
26934 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26936 nregs
= GET_MODE_SIZE (mode
) / 4;
26937 for (i
= 0; i
< nregs
; i
+= 2)
26938 if (TARGET_BIG_END
)
26940 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26941 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26945 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26946 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26951 nregs
= GET_MODE_SIZE (mode
) / 8;
26952 for (i
= 0; i
< nregs
; i
++)
26953 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26956 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26959 #if ARM_UNWIND_INFO
26960 /* Emit unwind directives for a store-multiple instruction or stack pointer
26961 push during alignment.
26962 These should only ever be generated by the function prologue code, so
26963 expect them to have a particular form.
26964 The store-multiple instruction sometimes pushes pc as the last register,
26965 although it should not be tracked into unwind information, or for -Os
26966 sometimes pushes some dummy registers before first register that needs
26967 to be tracked in unwind information; such dummy registers are there just
26968 to avoid separate stack adjustment, and will not be restored in the
26972 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26975 HOST_WIDE_INT offset
;
26976 HOST_WIDE_INT nregs
;
26980 unsigned padfirst
= 0, padlast
= 0;
26983 e
= XVECEXP (p
, 0, 0);
26984 gcc_assert (GET_CODE (e
) == SET
);
26986 /* First insn will adjust the stack pointer. */
26987 gcc_assert (GET_CODE (e
) == SET
26988 && REG_P (SET_DEST (e
))
26989 && REGNO (SET_DEST (e
)) == SP_REGNUM
26990 && GET_CODE (SET_SRC (e
)) == PLUS
);
26992 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26993 nregs
= XVECLEN (p
, 0) - 1;
26994 gcc_assert (nregs
);
26996 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26999 /* For -Os dummy registers can be pushed at the beginning to
27000 avoid separate stack pointer adjustment. */
27001 e
= XVECEXP (p
, 0, 1);
27002 e
= XEXP (SET_DEST (e
), 0);
27003 if (GET_CODE (e
) == PLUS
)
27004 padfirst
= INTVAL (XEXP (e
, 1));
27005 gcc_assert (padfirst
== 0 || optimize_size
);
27006 /* The function prologue may also push pc, but not annotate it as it is
27007 never restored. We turn this into a stack pointer adjustment. */
27008 e
= XVECEXP (p
, 0, nregs
);
27009 e
= XEXP (SET_DEST (e
), 0);
27010 if (GET_CODE (e
) == PLUS
)
27011 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27013 padlast
= offset
- 4;
27014 gcc_assert (padlast
== 0 || padlast
== 4);
27016 fprintf (asm_out_file
, "\t.pad #4\n");
27018 fprintf (asm_out_file
, "\t.save {");
27020 else if (IS_VFP_REGNUM (reg
))
27023 fprintf (asm_out_file
, "\t.vsave {");
27026 /* Unknown register type. */
27027 gcc_unreachable ();
27029 /* If the stack increment doesn't match the size of the saved registers,
27030 something has gone horribly wrong. */
27031 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27035 /* The remaining insns will describe the stores. */
27036 for (i
= 1; i
<= nregs
; i
++)
27038 /* Expect (set (mem <addr>) (reg)).
27039 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27040 e
= XVECEXP (p
, 0, i
);
27041 gcc_assert (GET_CODE (e
) == SET
27042 && MEM_P (SET_DEST (e
))
27043 && REG_P (SET_SRC (e
)));
27045 reg
= REGNO (SET_SRC (e
));
27046 gcc_assert (reg
>= lastreg
);
27049 fprintf (asm_out_file
, ", ");
27050 /* We can't use %r for vfp because we need to use the
27051 double precision register names. */
27052 if (IS_VFP_REGNUM (reg
))
27053 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27055 asm_fprintf (asm_out_file
, "%r", reg
);
27059 /* Check that the addresses are consecutive. */
27060 e
= XEXP (SET_DEST (e
), 0);
27061 if (GET_CODE (e
) == PLUS
)
27062 gcc_assert (REG_P (XEXP (e
, 0))
27063 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27064 && CONST_INT_P (XEXP (e
, 1))
27065 && offset
== INTVAL (XEXP (e
, 1)));
27069 && REGNO (e
) == SP_REGNUM
);
27070 offset
+= reg_size
;
27073 fprintf (asm_out_file
, "}\n");
27075 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27078 /* Emit unwind directives for a SET. */
27081 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27089 switch (GET_CODE (e0
))
27092 /* Pushing a single register. */
27093 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27094 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27095 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27098 asm_fprintf (asm_out_file
, "\t.save ");
27099 if (IS_VFP_REGNUM (REGNO (e1
)))
27100 asm_fprintf(asm_out_file
, "{d%d}\n",
27101 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27103 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27107 if (REGNO (e0
) == SP_REGNUM
)
27109 /* A stack increment. */
27110 if (GET_CODE (e1
) != PLUS
27111 || !REG_P (XEXP (e1
, 0))
27112 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27113 || !CONST_INT_P (XEXP (e1
, 1)))
27116 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27117 -INTVAL (XEXP (e1
, 1)));
27119 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27121 HOST_WIDE_INT offset
;
27123 if (GET_CODE (e1
) == PLUS
)
27125 if (!REG_P (XEXP (e1
, 0))
27126 || !CONST_INT_P (XEXP (e1
, 1)))
27128 reg
= REGNO (XEXP (e1
, 0));
27129 offset
= INTVAL (XEXP (e1
, 1));
27130 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27131 HARD_FRAME_POINTER_REGNUM
, reg
,
27134 else if (REG_P (e1
))
27137 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27138 HARD_FRAME_POINTER_REGNUM
, reg
);
27143 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27145 /* Move from sp to reg. */
27146 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27148 else if (GET_CODE (e1
) == PLUS
27149 && REG_P (XEXP (e1
, 0))
27150 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27151 && CONST_INT_P (XEXP (e1
, 1)))
27153 /* Set reg to offset from sp. */
27154 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27155 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27167 /* Emit unwind directives for the given insn. */
27170 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27173 bool handled_one
= false;
27175 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27178 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27179 && (TREE_NOTHROW (current_function_decl
)
27180 || crtl
->all_throwers_are_sibcalls
))
27183 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27186 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27188 switch (REG_NOTE_KIND (note
))
27190 case REG_FRAME_RELATED_EXPR
:
27191 pat
= XEXP (note
, 0);
27194 case REG_CFA_REGISTER
:
27195 pat
= XEXP (note
, 0);
27198 pat
= PATTERN (insn
);
27199 if (GET_CODE (pat
) == PARALLEL
)
27200 pat
= XVECEXP (pat
, 0, 0);
27203 /* Only emitted for IS_STACKALIGN re-alignment. */
27208 src
= SET_SRC (pat
);
27209 dest
= SET_DEST (pat
);
27211 gcc_assert (src
== stack_pointer_rtx
);
27212 reg
= REGNO (dest
);
27213 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27216 handled_one
= true;
27219 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27220 to get correct dwarf information for shrink-wrap. We should not
27221 emit unwind information for it because these are used either for
27222 pretend arguments or notes to adjust sp and restore registers from
27224 case REG_CFA_DEF_CFA
:
27225 case REG_CFA_ADJUST_CFA
:
27226 case REG_CFA_RESTORE
:
27229 case REG_CFA_EXPRESSION
:
27230 case REG_CFA_OFFSET
:
27231 /* ??? Only handling here what we actually emit. */
27232 gcc_unreachable ();
27240 pat
= PATTERN (insn
);
27243 switch (GET_CODE (pat
))
27246 arm_unwind_emit_set (asm_out_file
, pat
);
27250 /* Store multiple. */
27251 arm_unwind_emit_sequence (asm_out_file
, pat
);
27260 /* Output a reference from a function exception table to the type_info
27261 object X. The EABI specifies that the symbol should be relocated by
27262 an R_ARM_TARGET2 relocation. */
27265 arm_output_ttype (rtx x
)
27267 fputs ("\t.word\t", asm_out_file
);
27268 output_addr_const (asm_out_file
, x
);
27269 /* Use special relocations for symbol references. */
27270 if (!CONST_INT_P (x
))
27271 fputs ("(TARGET2)", asm_out_file
);
27272 fputc ('\n', asm_out_file
);
27277 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27280 arm_asm_emit_except_personality (rtx personality
)
27282 fputs ("\t.personality\t", asm_out_file
);
27283 output_addr_const (asm_out_file
, personality
);
27284 fputc ('\n', asm_out_file
);
27286 #endif /* ARM_UNWIND_INFO */
27288 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27291 arm_asm_init_sections (void)
27293 #if ARM_UNWIND_INFO
27294 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27296 #endif /* ARM_UNWIND_INFO */
27298 #ifdef OBJECT_FORMAT_ELF
27299 if (target_pure_code
)
27300 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27304 /* Output unwind directives for the start/end of a function. */
27307 arm_output_fn_unwind (FILE * f
, bool prologue
)
27309 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27313 fputs ("\t.fnstart\n", f
);
27316 /* If this function will never be unwound, then mark it as such.
27317 The came condition is used in arm_unwind_emit to suppress
27318 the frame annotations. */
27319 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27320 && (TREE_NOTHROW (current_function_decl
)
27321 || crtl
->all_throwers_are_sibcalls
))
27322 fputs("\t.cantunwind\n", f
);
27324 fputs ("\t.fnend\n", f
);
27329 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27331 enum tls_reloc reloc
;
27334 val
= XVECEXP (x
, 0, 0);
27335 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27337 output_addr_const (fp
, val
);
27342 fputs ("(tlsgd)", fp
);
27345 fputs ("(tlsldm)", fp
);
27348 fputs ("(tlsldo)", fp
);
27351 fputs ("(gottpoff)", fp
);
27354 fputs ("(tpoff)", fp
);
27357 fputs ("(tlsdesc)", fp
);
27360 gcc_unreachable ();
27369 fputs (" + (. - ", fp
);
27370 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27371 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27372 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27373 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27383 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27386 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27388 gcc_assert (size
== 4);
27389 fputs ("\t.word\t", file
);
27390 output_addr_const (file
, x
);
27391 fputs ("(tlsldo)", file
);
27394 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27397 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27399 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27400 return arm_emit_tls_decoration (fp
, x
);
27401 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27404 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27406 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27407 assemble_name_raw (fp
, label
);
27411 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27413 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27417 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27421 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27423 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27427 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27431 else if (GET_CODE (x
) == CONST_VECTOR
)
27432 return arm_emit_vector_const (fp
, x
);
27437 /* Output assembly for a shift instruction.
27438 SET_FLAGS determines how the instruction modifies the condition codes.
27439 0 - Do not set condition codes.
27440 1 - Set condition codes.
27441 2 - Use smallest instruction. */
27443 arm_output_shift(rtx
* operands
, int set_flags
)
27446 static const char flag_chars
[3] = {'?', '.', '!'};
27451 c
= flag_chars
[set_flags
];
27452 shift
= shift_op(operands
[3], &val
);
27456 operands
[2] = GEN_INT(val
);
27457 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27460 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27462 output_asm_insn (pattern
, operands
);
27466 /* Output assembly for a WMMX immediate shift instruction. */
27468 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27470 int shift
= INTVAL (operands
[2]);
27472 machine_mode opmode
= GET_MODE (operands
[0]);
27474 gcc_assert (shift
>= 0);
27476 /* If the shift value in the register versions is > 63 (for D qualifier),
27477 31 (for W qualifier) or 15 (for H qualifier). */
27478 if (((opmode
== V4HImode
) && (shift
> 15))
27479 || ((opmode
== V2SImode
) && (shift
> 31))
27480 || ((opmode
== DImode
) && (shift
> 63)))
27484 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27485 output_asm_insn (templ
, operands
);
27486 if (opmode
== DImode
)
27488 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27489 output_asm_insn (templ
, operands
);
27494 /* The destination register will contain all zeros. */
27495 sprintf (templ
, "wzero\t%%0");
27496 output_asm_insn (templ
, operands
);
27501 if ((opmode
== DImode
) && (shift
> 32))
27503 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27504 output_asm_insn (templ
, operands
);
27505 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27506 output_asm_insn (templ
, operands
);
27510 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27511 output_asm_insn (templ
, operands
);
27516 /* Output assembly for a WMMX tinsr instruction. */
27518 arm_output_iwmmxt_tinsr (rtx
*operands
)
27520 int mask
= INTVAL (operands
[3]);
27523 int units
= mode_nunits
[GET_MODE (operands
[0])];
27524 gcc_assert ((mask
& (mask
- 1)) == 0);
27525 for (i
= 0; i
< units
; ++i
)
27527 if ((mask
& 0x01) == 1)
27533 gcc_assert (i
< units
);
27535 switch (GET_MODE (operands
[0]))
27538 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27541 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27544 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27547 gcc_unreachable ();
27550 output_asm_insn (templ
, operands
);
27555 /* Output a Thumb-1 casesi dispatch sequence. */
27557 thumb1_output_casesi (rtx
*operands
)
27559 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27561 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27563 switch (GET_MODE(diff_vec
))
27566 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27567 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27569 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27570 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27572 return "bl\t%___gnu_thumb1_case_si";
27574 gcc_unreachable ();
27578 /* Output a Thumb-2 casesi instruction. */
27580 thumb2_output_casesi (rtx
*operands
)
27582 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27584 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27586 output_asm_insn ("cmp\t%0, %1", operands
);
27587 output_asm_insn ("bhi\t%l3", operands
);
27588 switch (GET_MODE(diff_vec
))
27591 return "tbb\t[%|pc, %0]";
27593 return "tbh\t[%|pc, %0, lsl #1]";
27597 output_asm_insn ("adr\t%4, %l2", operands
);
27598 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27599 output_asm_insn ("add\t%4, %4, %5", operands
);
27604 output_asm_insn ("adr\t%4, %l2", operands
);
27605 return "ldr\t%|pc, [%4, %0, lsl #2]";
27608 gcc_unreachable ();
27612 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27613 per-core tuning structs. */
27615 arm_issue_rate (void)
27617 return current_tune
->issue_rate
;
27620 /* Return how many instructions should scheduler lookahead to choose the
27623 arm_first_cycle_multipass_dfa_lookahead (void)
27625 int issue_rate
= arm_issue_rate ();
27627 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27630 /* Enable modeling of L2 auto-prefetcher. */
27632 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27634 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27638 arm_mangle_type (const_tree type
)
27640 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27641 has to be managled as if it is in the "std" namespace. */
27642 if (TARGET_AAPCS_BASED
27643 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27644 return "St9__va_list";
27646 /* Half-precision float. */
27647 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27650 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27652 if (TYPE_NAME (type
) != NULL
)
27653 return arm_mangle_builtin_type (type
);
27655 /* Use the default mangling. */
27659 /* Order of allocation of core registers for Thumb: this allocation is
27660 written over the corresponding initial entries of the array
27661 initialized with REG_ALLOC_ORDER. We allocate all low registers
27662 first. Saving and restoring a low register is usually cheaper than
27663 using a call-clobbered high register. */
27665 static const int thumb_core_reg_alloc_order
[] =
27667 3, 2, 1, 0, 4, 5, 6, 7,
27668 12, 14, 8, 9, 10, 11
27671 /* Adjust register allocation order when compiling for Thumb. */
27674 arm_order_regs_for_local_alloc (void)
27676 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27677 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27679 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27680 sizeof (thumb_core_reg_alloc_order
));
27683 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27686 arm_frame_pointer_required (void)
27688 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27691 /* If the function receives nonlocal gotos, it needs to save the frame
27692 pointer in the nonlocal_goto_save_area object. */
27693 if (cfun
->has_nonlocal_label
)
27696 /* The frame pointer is required for non-leaf APCS frames. */
27697 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27700 /* If we are probing the stack in the prologue, we will have a faulting
27701 instruction prior to the stack adjustment and this requires a frame
27702 pointer if we want to catch the exception using the EABI unwinder. */
27703 if (!IS_INTERRUPT (arm_current_func_type ())
27704 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27705 && arm_except_unwind_info (&global_options
) == UI_TARGET
27706 && cfun
->can_throw_non_call_exceptions
)
27708 HOST_WIDE_INT size
= get_frame_size ();
27710 /* That's irrelevant if there is no stack adjustment. */
27714 /* That's relevant only if there is a stack probe. */
27715 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27717 /* We don't have the final size of the frame so adjust. */
27718 size
+= 32 * UNITS_PER_WORD
;
27719 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27729 /* Only thumb1 can't support conditional execution, so return true if
27730 the target is not thumb1. */
27732 arm_have_conditional_execution (void)
27734 return !TARGET_THUMB1
;
27737 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27738 static HOST_WIDE_INT
27739 arm_vector_alignment (const_tree type
)
27741 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27743 if (TARGET_AAPCS_BASED
)
27744 align
= MIN (align
, 64);
27749 static unsigned int
27750 arm_autovectorize_vector_sizes (void)
27752 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27756 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27758 /* Vectors which aren't in packed structures will not be less aligned than
27759 the natural alignment of their element type, so this is safe. */
27760 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27763 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27767 arm_builtin_support_vector_misalignment (machine_mode mode
,
27768 const_tree type
, int misalignment
,
27771 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27773 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27778 /* If the misalignment is unknown, we should be able to handle the access
27779 so long as it is not to a member of a packed data structure. */
27780 if (misalignment
== -1)
27783 /* Return true if the misalignment is a multiple of the natural alignment
27784 of the vector's element type. This is probably always going to be
27785 true in practice, since we've already established that this isn't a
27787 return ((misalignment
% align
) == 0);
27790 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27795 arm_conditional_register_usage (void)
27799 if (TARGET_THUMB1
&& optimize_size
)
27801 /* When optimizing for size on Thumb-1, it's better not
27802 to use the HI regs, because of the overhead of
27804 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27805 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27808 /* The link register can be clobbered by any branch insn,
27809 but we have no way to track that at present, so mark
27810 it as unavailable. */
27812 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27814 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
27816 /* VFPv3 registers are disabled when earlier VFP
27817 versions are selected due to the definition of
27818 LAST_VFP_REGNUM. */
27819 for (regno
= FIRST_VFP_REGNUM
;
27820 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27822 fixed_regs
[regno
] = 0;
27823 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27824 || regno
>= FIRST_VFP_REGNUM
+ 32;
27828 if (TARGET_REALLY_IWMMXT
)
27830 regno
= FIRST_IWMMXT_GR_REGNUM
;
27831 /* The 2002/10/09 revision of the XScale ABI has wCG0
27832 and wCG1 as call-preserved registers. The 2002/11/21
27833 revision changed this so that all wCG registers are
27834 scratch registers. */
27835 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27836 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27837 fixed_regs
[regno
] = 0;
27838 /* The XScale ABI has wR0 - wR9 as scratch registers,
27839 the rest as call-preserved registers. */
27840 for (regno
= FIRST_IWMMXT_REGNUM
;
27841 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27843 fixed_regs
[regno
] = 0;
27844 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27848 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27850 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27851 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27853 else if (TARGET_APCS_STACK
)
27855 fixed_regs
[10] = 1;
27856 call_used_regs
[10] = 1;
27858 /* -mcaller-super-interworking reserves r11 for calls to
27859 _interwork_r11_call_via_rN(). Making the register global
27860 is an easy way of ensuring that it remains valid for all
27862 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27863 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27865 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27866 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27867 if (TARGET_CALLER_INTERWORKING
)
27868 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27870 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27874 arm_preferred_rename_class (reg_class_t rclass
)
27876 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27877 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27878 and code size can be reduced. */
27879 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27885 /* Compute the attribute "length" of insn "*push_multi".
27886 So this function MUST be kept in sync with that insn pattern. */
27888 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27890 int i
, regno
, hi_reg
;
27891 int num_saves
= XVECLEN (parallel_op
, 0);
27901 regno
= REGNO (first_op
);
27902 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27903 list is 8-bit. Normally this means all registers in the list must be
27904 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27905 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27906 with 16-bit encoding. */
27907 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27908 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27910 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27911 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27919 /* Compute the attribute "length" of insn. Currently, this function is used
27920 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27921 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27922 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27923 true if OPERANDS contains insn which explicit updates base register. */
27926 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
27935 rtx parallel_op
= operands
[0];
27936 /* Initialize to elements number of PARALLEL. */
27937 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
27938 /* Initialize the value to base register. */
27939 unsigned regno
= REGNO (operands
[1]);
27940 /* Skip return and write back pattern.
27941 We only need register pop pattern for later analysis. */
27942 unsigned first_indx
= 0;
27943 first_indx
+= return_pc
? 1 : 0;
27944 first_indx
+= write_back_p
? 1 : 0;
27946 /* A pop operation can be done through LDM or POP. If the base register is SP
27947 and if it's with write back, then a LDM will be alias of POP. */
27948 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
27949 bool ldm_p
= !pop_p
;
27951 /* Check base register for LDM. */
27952 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
27955 /* Check each register in the list. */
27956 for (; indx
>= first_indx
; indx
--)
27958 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
27959 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27960 comment in arm_attr_length_push_multi. */
27961 if (REGNO_REG_CLASS (regno
) == HI_REGS
27962 && (regno
!= PC_REGNUM
|| ldm_p
))
27969 /* Compute the number of instructions emitted by output_move_double. */
27971 arm_count_output_move_double_insns (rtx
*operands
)
27975 /* output_move_double may modify the operands array, so call it
27976 here on a copy of the array. */
27977 ops
[0] = operands
[0];
27978 ops
[1] = operands
[1];
27979 output_move_double (ops
, false, &count
);
27984 vfp3_const_double_for_fract_bits (rtx operand
)
27986 REAL_VALUE_TYPE r0
;
27988 if (!CONST_DOUBLE_P (operand
))
27991 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
27992 if (exact_real_inverse (DFmode
, &r0
)
27993 && !REAL_VALUE_NEGATIVE (r0
))
27995 if (exact_real_truncate (DFmode
, &r0
))
27997 HOST_WIDE_INT value
= real_to_integer (&r0
);
27998 value
= value
& 0xffffffff;
27999 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28001 int ret
= exact_log2 (value
);
28002 gcc_assert (IN_RANGE (ret
, 0, 31));
28010 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28011 log2 is in [1, 32], return that log2. Otherwise return -1.
28012 This is used in the patterns for vcvt.s32.f32 floating-point to
28013 fixed-point conversions. */
28016 vfp3_const_double_for_bits (rtx x
)
28018 const REAL_VALUE_TYPE
*r
;
28020 if (!CONST_DOUBLE_P (x
))
28023 r
= CONST_DOUBLE_REAL_VALUE (x
);
28025 if (REAL_VALUE_NEGATIVE (*r
)
28026 || REAL_VALUE_ISNAN (*r
)
28027 || REAL_VALUE_ISINF (*r
)
28028 || !real_isinteger (r
, SFmode
))
28031 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28033 /* The exact_log2 above will have returned -1 if this is
28034 not an exact log2. */
28035 if (!IN_RANGE (hwint
, 1, 32))
28042 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28045 arm_pre_atomic_barrier (enum memmodel model
)
28047 if (need_atomic_barrier_p (model
, true))
28048 emit_insn (gen_memory_barrier ());
28052 arm_post_atomic_barrier (enum memmodel model
)
28054 if (need_atomic_barrier_p (model
, false))
28055 emit_insn (gen_memory_barrier ());
28058 /* Emit the load-exclusive and store-exclusive instructions.
28059 Use acquire and release versions if necessary. */
28062 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28064 rtx (*gen
) (rtx
, rtx
);
28070 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28071 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28072 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28073 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28075 gcc_unreachable ();
28082 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28083 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
28084 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
28085 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
28087 gcc_unreachable ();
28091 emit_insn (gen (rval
, mem
));
28095 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28098 rtx (*gen
) (rtx
, rtx
, rtx
);
28104 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28105 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28106 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28107 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28109 gcc_unreachable ();
28116 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28117 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
28118 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
28119 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
28121 gcc_unreachable ();
28125 emit_insn (gen (bval
, rval
, mem
));
28128 /* Mark the previous jump instruction as unlikely. */
28131 emit_unlikely_jump (rtx insn
)
28133 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
28135 rtx_insn
*jump
= emit_jump_insn (insn
);
28136 add_int_reg_note (jump
, REG_BR_PROB
, very_unlikely
);
28139 /* Expand a compare and swap pattern. */
28142 arm_expand_compare_and_swap (rtx operands
[])
28144 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28146 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28148 bval
= operands
[0];
28149 rval
= operands
[1];
28151 oldval
= operands
[3];
28152 newval
= operands
[4];
28153 is_weak
= operands
[5];
28154 mod_s
= operands
[6];
28155 mod_f
= operands
[7];
28156 mode
= GET_MODE (mem
);
28158 /* Normally the succ memory model must be stronger than fail, but in the
28159 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28160 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28162 if (TARGET_HAVE_LDACQ
28163 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28164 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28165 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28171 /* For narrow modes, we're going to perform the comparison in SImode,
28172 so do the zero-extension now. */
28173 rval
= gen_reg_rtx (SImode
);
28174 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28178 /* Force the value into a register if needed. We waited until after
28179 the zero-extension above to do this properly. */
28180 if (!arm_add_operand (oldval
, SImode
))
28181 oldval
= force_reg (SImode
, oldval
);
28185 if (!cmpdi_operand (oldval
, mode
))
28186 oldval
= force_reg (mode
, oldval
);
28190 gcc_unreachable ();
28195 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
28196 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
28197 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
28198 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
28200 gcc_unreachable ();
28203 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CCmode
, CC_REGNUM
);
28204 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28206 if (mode
== QImode
|| mode
== HImode
)
28207 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28209 /* In all cases, we arrange for success to be signaled by Z set.
28210 This arrangement allows for the boolean result to be used directly
28211 in a subsequent branch, post optimization. For Thumb-1 targets, the
28212 boolean negation of the result is also stored in bval because Thumb-1
28213 backend lacks dependency tracking for CC flag due to flag-setting not
28214 being represented at RTL level. */
28216 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28219 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28220 emit_insn (gen_rtx_SET (bval
, x
));
28224 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28225 another memory store between the load-exclusive and store-exclusive can
28226 reset the monitor from Exclusive to Open state. This means we must wait
28227 until after reload to split the pattern, lest we get a register spill in
28228 the middle of the atomic sequence. Success of the compare and swap is
28229 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28230 for Thumb-1 targets (ie. negation of the boolean value returned by
28231 atomic_compare_and_swapmode standard pattern in operand 0). */
28234 arm_split_compare_and_swap (rtx operands
[])
28236 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28238 enum memmodel mod_s
, mod_f
;
28240 rtx_code_label
*label1
, *label2
;
28243 rval
= operands
[1];
28245 oldval
= operands
[3];
28246 newval
= operands
[4];
28247 is_weak
= (operands
[5] != const0_rtx
);
28248 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28249 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28250 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28251 mode
= GET_MODE (mem
);
28253 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28255 bool use_acquire
= TARGET_HAVE_LDACQ
28256 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28257 || is_mm_release (mod_s
));
28259 bool use_release
= TARGET_HAVE_LDACQ
28260 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28261 || is_mm_acquire (mod_s
));
28263 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28264 a full barrier is emitted after the store-release. */
28266 use_acquire
= false;
28268 /* Checks whether a barrier is needed and emits one accordingly. */
28269 if (!(use_acquire
|| use_release
))
28270 arm_pre_atomic_barrier (mod_s
);
28275 label1
= gen_label_rtx ();
28276 emit_label (label1
);
28278 label2
= gen_label_rtx ();
28280 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28282 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28283 as required to communicate with arm_expand_compare_and_swap. */
28286 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28287 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28288 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28289 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28290 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28294 emit_move_insn (neg_bval
, const1_rtx
);
28295 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28296 if (thumb1_cmpneg_operand (oldval
, SImode
))
28297 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28300 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28303 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28305 /* Weak or strong, we want EQ to be true for success, so that we
28306 match the flags that we got from the compare above. */
28309 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28310 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28311 emit_insn (gen_rtx_SET (cond
, x
));
28316 /* Z is set to boolean value of !neg_bval, as required to communicate
28317 with arm_expand_compare_and_swap. */
28318 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28319 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28322 if (!is_mm_relaxed (mod_f
))
28323 emit_label (label2
);
28325 /* Checks whether a barrier is needed and emits one accordingly. */
28327 || !(use_acquire
|| use_release
))
28328 arm_post_atomic_barrier (mod_s
);
28330 if (is_mm_relaxed (mod_f
))
28331 emit_label (label2
);
28334 /* Split an atomic operation pattern. Operation is given by CODE and is one
28335 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28336 operation). Operation is performed on the content at MEM and on VALUE
28337 following the memory model MODEL_RTX. The content at MEM before and after
28338 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28339 success of the operation is returned in COND. Using a scratch register or
28340 an operand register for these determines what result is returned for that
28344 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28345 rtx value
, rtx model_rtx
, rtx cond
)
28347 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28348 machine_mode mode
= GET_MODE (mem
);
28349 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28350 rtx_code_label
*label
;
28351 bool all_low_regs
, bind_old_new
;
28354 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28356 bool use_acquire
= TARGET_HAVE_LDACQ
28357 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28358 || is_mm_release (model
));
28360 bool use_release
= TARGET_HAVE_LDACQ
28361 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28362 || is_mm_acquire (model
));
28364 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28365 a full barrier is emitted after the store-release. */
28367 use_acquire
= false;
28369 /* Checks whether a barrier is needed and emits one accordingly. */
28370 if (!(use_acquire
|| use_release
))
28371 arm_pre_atomic_barrier (model
);
28373 label
= gen_label_rtx ();
28374 emit_label (label
);
28377 new_out
= gen_lowpart (wmode
, new_out
);
28379 old_out
= gen_lowpart (wmode
, old_out
);
28382 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28384 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28386 /* Does the operation require destination and first operand to use the same
28387 register? This is decided by register constraints of relevant insn
28388 patterns in thumb1.md. */
28389 gcc_assert (!new_out
|| REG_P (new_out
));
28390 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28391 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28392 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28397 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28399 /* We want to return the old value while putting the result of the operation
28400 in the same register as the old value so copy the old value over to the
28401 destination register and use that register for the operation. */
28402 if (old_out
&& bind_old_new
)
28404 emit_move_insn (new_out
, old_out
);
28415 x
= gen_rtx_AND (wmode
, old_out
, value
);
28416 emit_insn (gen_rtx_SET (new_out
, x
));
28417 x
= gen_rtx_NOT (wmode
, new_out
);
28418 emit_insn (gen_rtx_SET (new_out
, x
));
28422 if (CONST_INT_P (value
))
28424 value
= GEN_INT (-INTVAL (value
));
28430 if (mode
== DImode
)
28432 /* DImode plus/minus need to clobber flags. */
28433 /* The adddi3 and subdi3 patterns are incorrectly written so that
28434 they require matching operands, even when we could easily support
28435 three operands. Thankfully, this can be fixed up post-splitting,
28436 as the individual add+adc patterns do accept three operands and
28437 post-reload cprop can make these moves go away. */
28438 emit_move_insn (new_out
, old_out
);
28440 x
= gen_adddi3 (new_out
, new_out
, value
);
28442 x
= gen_subdi3 (new_out
, new_out
, value
);
28449 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28450 emit_insn (gen_rtx_SET (new_out
, x
));
28454 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28457 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28458 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28460 /* Checks whether a barrier is needed and emits one accordingly. */
28462 || !(use_acquire
|| use_release
))
28463 arm_post_atomic_barrier (model
);
28466 #define MAX_VECT_LEN 16
28468 struct expand_vec_perm_d
28470 rtx target
, op0
, op1
;
28471 unsigned char perm
[MAX_VECT_LEN
];
28472 machine_mode vmode
;
28473 unsigned char nelt
;
28478 /* Generate a variable permutation. */
28481 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28483 machine_mode vmode
= GET_MODE (target
);
28484 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28486 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28487 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28488 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28489 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28490 gcc_checking_assert (TARGET_NEON
);
28494 if (vmode
== V8QImode
)
28495 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28497 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28503 if (vmode
== V8QImode
)
28505 pair
= gen_reg_rtx (V16QImode
);
28506 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28507 pair
= gen_lowpart (TImode
, pair
);
28508 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28512 pair
= gen_reg_rtx (OImode
);
28513 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28514 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28520 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28522 machine_mode vmode
= GET_MODE (target
);
28523 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28524 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28525 rtx rmask
[MAX_VECT_LEN
], mask
;
28527 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28528 numbering of elements for big-endian, we must reverse the order. */
28529 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28531 /* The VTBL instruction does not use a modulo index, so we must take care
28532 of that ourselves. */
28533 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28534 for (i
= 0; i
< nelt
; ++i
)
28536 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28537 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28539 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28542 /* Map lane ordering between architectural lane order, and GCC lane order,
28543 taking into account ABI. See comment above output_move_neon for details. */
28546 neon_endian_lane_map (machine_mode mode
, int lane
)
28548 if (BYTES_BIG_ENDIAN
)
28550 int nelems
= GET_MODE_NUNITS (mode
);
28551 /* Reverse lane order. */
28552 lane
= (nelems
- 1 - lane
);
28553 /* Reverse D register order, to match ABI. */
28554 if (GET_MODE_SIZE (mode
) == 16)
28555 lane
= lane
^ (nelems
/ 2);
28560 /* Some permutations index into pairs of vectors, this is a helper function
28561 to map indexes into those pairs of vectors. */
28564 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28566 int nelem
= GET_MODE_NUNITS (mode
);
28567 if (BYTES_BIG_ENDIAN
)
28569 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28573 /* Generate or test for an insn that supports a constant permutation. */
28575 /* Recognize patterns for the VUZP insns. */
28578 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28580 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28581 rtx out0
, out1
, in0
, in1
;
28582 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28586 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28589 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28590 big endian pattern on 64 bit vectors, so we correct for that. */
28591 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28592 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28594 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28596 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28598 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28602 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28604 for (i
= 0; i
< nelt
; i
++)
28607 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28608 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28618 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28619 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28620 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28621 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28622 case V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28623 case V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28624 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28625 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28626 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28627 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28629 gcc_unreachable ();
28634 if (swap_nelt
!= 0)
28635 std::swap (in0
, in1
);
28638 out1
= gen_reg_rtx (d
->vmode
);
28640 std::swap (out0
, out1
);
28642 emit_insn (gen (out0
, in0
, in1
, out1
));
28646 /* Recognize patterns for the VZIP insns. */
28649 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28651 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28652 rtx out0
, out1
, in0
, in1
;
28653 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28657 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28660 is_swapped
= BYTES_BIG_ENDIAN
;
28662 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28665 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28667 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28671 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28673 for (i
= 0; i
< nelt
/ 2; i
++)
28676 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28677 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28681 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28682 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28693 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28694 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28695 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28696 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28697 case V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28698 case V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28699 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28700 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28701 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28702 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28704 gcc_unreachable ();
28710 std::swap (in0
, in1
);
28713 out1
= gen_reg_rtx (d
->vmode
);
28715 std::swap (out0
, out1
);
28717 emit_insn (gen (out0
, in0
, in1
, out1
));
28721 /* Recognize patterns for the VREV insns. */
28724 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28726 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28727 rtx (*gen
)(rtx
, rtx
);
28729 if (!d
->one_vector_p
)
28738 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28739 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28747 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28748 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28749 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28750 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28751 case V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28752 case V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28760 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28761 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28762 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28763 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28764 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28765 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28766 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28767 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28776 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28777 for (j
= 0; j
<= diff
; j
+= 1)
28779 /* This is guaranteed to be true as the value of diff
28780 is 7, 3, 1 and we should have enough elements in the
28781 queue to generate this. Getting a vector mask with a
28782 value of diff other than these values implies that
28783 something is wrong by the time we get here. */
28784 gcc_assert (i
+ j
< nelt
);
28785 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28793 emit_insn (gen (d
->target
, d
->op0
));
28797 /* Recognize patterns for the VTRN insns. */
28800 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28802 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28803 rtx out0
, out1
, in0
, in1
;
28804 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28806 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28809 /* Note that these are little-endian tests. Adjust for big-endian later. */
28810 if (d
->perm
[0] == 0)
28812 else if (d
->perm
[0] == 1)
28816 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28818 for (i
= 0; i
< nelt
; i
+= 2)
28820 if (d
->perm
[i
] != i
+ odd
)
28822 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28832 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28833 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28834 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28835 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28836 case V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
28837 case V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
28838 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28839 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28840 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28841 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28843 gcc_unreachable ();
28848 if (BYTES_BIG_ENDIAN
)
28850 std::swap (in0
, in1
);
28855 out1
= gen_reg_rtx (d
->vmode
);
28857 std::swap (out0
, out1
);
28859 emit_insn (gen (out0
, in0
, in1
, out1
));
28863 /* Recognize patterns for the VEXT insns. */
28866 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28868 unsigned int i
, nelt
= d
->nelt
;
28869 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28872 unsigned int location
;
28874 unsigned int next
= d
->perm
[0] + 1;
28876 /* TODO: Handle GCC's numbering of elements for big-endian. */
28877 if (BYTES_BIG_ENDIAN
)
28880 /* Check if the extracted indexes are increasing by one. */
28881 for (i
= 1; i
< nelt
; next
++, i
++)
28883 /* If we hit the most significant element of the 2nd vector in
28884 the previous iteration, no need to test further. */
28885 if (next
== 2 * nelt
)
28888 /* If we are operating on only one vector: it could be a
28889 rotation. If there are only two elements of size < 64, let
28890 arm_evpc_neon_vrev catch it. */
28891 if (d
->one_vector_p
&& (next
== nelt
))
28893 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28899 if (d
->perm
[i
] != next
)
28903 location
= d
->perm
[0];
28907 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28908 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28909 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28910 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28911 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28912 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28913 case V4HFmode
: gen
= gen_neon_vextv4hf
; break;
28914 case V8HFmode
: gen
= gen_neon_vextv8hf
; break;
28915 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28916 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28917 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28926 offset
= GEN_INT (location
);
28927 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28931 /* The NEON VTBL instruction is a fully variable permuation that's even
28932 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28933 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28934 can do slightly better by expanding this as a constant where we don't
28935 have to apply a mask. */
28938 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28940 rtx rperm
[MAX_VECT_LEN
], sel
;
28941 machine_mode vmode
= d
->vmode
;
28942 unsigned int i
, nelt
= d
->nelt
;
28944 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28945 numbering of elements for big-endian, we must reverse the order. */
28946 if (BYTES_BIG_ENDIAN
)
28952 /* Generic code will try constant permutation twice. Once with the
28953 original mode and again with the elements lowered to QImode.
28954 So wait and don't do the selector expansion ourselves. */
28955 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28958 for (i
= 0; i
< nelt
; ++i
)
28959 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28960 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28961 sel
= force_reg (vmode
, sel
);
28963 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28968 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28970 /* Check if the input mask matches vext before reordering the
28973 if (arm_evpc_neon_vext (d
))
28976 /* The pattern matching functions above are written to look for a small
28977 number to begin the sequence (0, 1, N/2). If we begin with an index
28978 from the second operand, we can swap the operands. */
28979 if (d
->perm
[0] >= d
->nelt
)
28981 unsigned i
, nelt
= d
->nelt
;
28983 for (i
= 0; i
< nelt
; ++i
)
28984 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28986 std::swap (d
->op0
, d
->op1
);
28991 if (arm_evpc_neon_vuzp (d
))
28993 if (arm_evpc_neon_vzip (d
))
28995 if (arm_evpc_neon_vrev (d
))
28997 if (arm_evpc_neon_vtrn (d
))
28999 return arm_evpc_neon_vtbl (d
);
29004 /* Expand a vec_perm_const pattern. */
29007 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29009 struct expand_vec_perm_d d
;
29010 int i
, nelt
, which
;
29016 d
.vmode
= GET_MODE (target
);
29017 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29018 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29019 d
.testing_p
= false;
29021 for (i
= which
= 0; i
< nelt
; ++i
)
29023 rtx e
= XVECEXP (sel
, 0, i
);
29024 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29025 which
|= (ei
< nelt
? 1 : 2);
29035 d
.one_vector_p
= false;
29036 if (!rtx_equal_p (op0
, op1
))
29039 /* The elements of PERM do not suggest that only the first operand
29040 is used, but both operands are identical. Allow easier matching
29041 of the permutation by folding the permutation into the single
29045 for (i
= 0; i
< nelt
; ++i
)
29046 d
.perm
[i
] &= nelt
- 1;
29048 d
.one_vector_p
= true;
29053 d
.one_vector_p
= true;
29057 return arm_expand_vec_perm_const_1 (&d
);
29060 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29063 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
29064 const unsigned char *sel
)
29066 struct expand_vec_perm_d d
;
29067 unsigned int i
, nelt
, which
;
29071 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29072 d
.testing_p
= true;
29073 memcpy (d
.perm
, sel
, nelt
);
29075 /* Categorize the set of elements in the selector. */
29076 for (i
= which
= 0; i
< nelt
; ++i
)
29078 unsigned char e
= d
.perm
[i
];
29079 gcc_assert (e
< 2 * nelt
);
29080 which
|= (e
< nelt
? 1 : 2);
29083 /* For all elements from second vector, fold the elements to first. */
29085 for (i
= 0; i
< nelt
; ++i
)
29088 /* Check whether the mask can be applied to the vector type. */
29089 d
.one_vector_p
= (which
!= 3);
29091 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29092 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29093 if (!d
.one_vector_p
)
29094 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29097 ret
= arm_expand_vec_perm_const_1 (&d
);
29104 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29106 /* If we are soft float and we do not have ldrd
29107 then all auto increment forms are ok. */
29108 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29113 /* Post increment and Pre Decrement are supported for all
29114 instruction forms except for vector forms. */
29117 if (VECTOR_MODE_P (mode
))
29119 if (code
!= ARM_PRE_DEC
)
29129 /* Without LDRD and mode size greater than
29130 word size, there is no point in auto-incrementing
29131 because ldm and stm will not have these forms. */
29132 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29135 /* Vector and floating point modes do not support
29136 these auto increment forms. */
29137 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29150 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29151 on ARM, since we know that shifts by negative amounts are no-ops.
29152 Additionally, the default expansion code is not available or suitable
29153 for post-reload insn splits (this can occur when the register allocator
29154 chooses not to do a shift in NEON).
29156 This function is used in both initial expand and post-reload splits, and
29157 handles all kinds of 64-bit shifts.
29159 Input requirements:
29160 - It is safe for the input and output to be the same register, but
29161 early-clobber rules apply for the shift amount and scratch registers.
29162 - Shift by register requires both scratch registers. In all other cases
29163 the scratch registers may be NULL.
29164 - Ashiftrt by a register also clobbers the CC register. */
29166 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29167 rtx amount
, rtx scratch1
, rtx scratch2
)
29169 rtx out_high
= gen_highpart (SImode
, out
);
29170 rtx out_low
= gen_lowpart (SImode
, out
);
29171 rtx in_high
= gen_highpart (SImode
, in
);
29172 rtx in_low
= gen_lowpart (SImode
, in
);
29175 in = the register pair containing the input value.
29176 out = the destination register pair.
29177 up = the high- or low-part of each pair.
29178 down = the opposite part to "up".
29179 In a shift, we can consider bits to shift from "up"-stream to
29180 "down"-stream, so in a left-shift "up" is the low-part and "down"
29181 is the high-part of each register pair. */
29183 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29184 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29185 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29186 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29188 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29190 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29191 && GET_MODE (out
) == DImode
);
29193 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29194 && GET_MODE (in
) == DImode
);
29196 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29197 && GET_MODE (amount
) == SImode
)
29198 || CONST_INT_P (amount
)));
29199 gcc_assert (scratch1
== NULL
29200 || (GET_CODE (scratch1
) == SCRATCH
)
29201 || (GET_MODE (scratch1
) == SImode
29202 && REG_P (scratch1
)));
29203 gcc_assert (scratch2
== NULL
29204 || (GET_CODE (scratch2
) == SCRATCH
)
29205 || (GET_MODE (scratch2
) == SImode
29206 && REG_P (scratch2
)));
29207 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29208 || !HARD_REGISTER_P (out
)
29209 || (REGNO (out
) != REGNO (amount
)
29210 && REGNO (out
) + 1 != REGNO (amount
)));
29212 /* Macros to make following code more readable. */
29213 #define SUB_32(DEST,SRC) \
29214 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29215 #define RSB_32(DEST,SRC) \
29216 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29217 #define SUB_S_32(DEST,SRC) \
29218 gen_addsi3_compare0 ((DEST), (SRC), \
29220 #define SET(DEST,SRC) \
29221 gen_rtx_SET ((DEST), (SRC))
29222 #define SHIFT(CODE,SRC,AMOUNT) \
29223 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29224 #define LSHIFT(CODE,SRC,AMOUNT) \
29225 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29226 SImode, (SRC), (AMOUNT))
29227 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29228 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29229 SImode, (SRC), (AMOUNT))
29231 gen_rtx_IOR (SImode, (A), (B))
29232 #define BRANCH(COND,LABEL) \
29233 gen_arm_cond_branch ((LABEL), \
29234 gen_rtx_ ## COND (CCmode, cc_reg, \
29238 /* Shifts by register and shifts by constant are handled separately. */
29239 if (CONST_INT_P (amount
))
29241 /* We have a shift-by-constant. */
29243 /* First, handle out-of-range shift amounts.
29244 In both cases we try to match the result an ARM instruction in a
29245 shift-by-register would give. This helps reduce execution
29246 differences between optimization levels, but it won't stop other
29247 parts of the compiler doing different things. This is "undefined
29248 behavior, in any case. */
29249 if (INTVAL (amount
) <= 0)
29250 emit_insn (gen_movdi (out
, in
));
29251 else if (INTVAL (amount
) >= 64)
29253 if (code
== ASHIFTRT
)
29255 rtx const31_rtx
= GEN_INT (31);
29256 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29257 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29260 emit_insn (gen_movdi (out
, const0_rtx
));
29263 /* Now handle valid shifts. */
29264 else if (INTVAL (amount
) < 32)
29266 /* Shifts by a constant less than 32. */
29267 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29269 /* Clearing the out register in DImode first avoids lots
29270 of spilling and results in less stack usage.
29271 Later this redundant insn is completely removed.
29272 Do that only if "in" and "out" are different registers. */
29273 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29274 emit_insn (SET (out
, const0_rtx
));
29275 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29276 emit_insn (SET (out_down
,
29277 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29279 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29283 /* Shifts by a constant greater than 31. */
29284 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29286 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29287 emit_insn (SET (out
, const0_rtx
));
29288 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29289 if (code
== ASHIFTRT
)
29290 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29293 emit_insn (SET (out_up
, const0_rtx
));
29298 /* We have a shift-by-register. */
29299 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29301 /* This alternative requires the scratch registers. */
29302 gcc_assert (scratch1
&& REG_P (scratch1
));
29303 gcc_assert (scratch2
&& REG_P (scratch2
));
29305 /* We will need the values "amount-32" and "32-amount" later.
29306 Swapping them around now allows the later code to be more general. */
29310 emit_insn (SUB_32 (scratch1
, amount
));
29311 emit_insn (RSB_32 (scratch2
, amount
));
29314 emit_insn (RSB_32 (scratch1
, amount
));
29315 /* Also set CC = amount > 32. */
29316 emit_insn (SUB_S_32 (scratch2
, amount
));
29319 emit_insn (RSB_32 (scratch1
, amount
));
29320 emit_insn (SUB_32 (scratch2
, amount
));
29323 gcc_unreachable ();
29326 /* Emit code like this:
29329 out_down = in_down << amount;
29330 out_down = (in_up << (amount - 32)) | out_down;
29331 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29332 out_up = in_up << amount;
29335 out_down = in_down >> amount;
29336 out_down = (in_up << (32 - amount)) | out_down;
29338 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29339 out_up = in_up << amount;
29342 out_down = in_down >> amount;
29343 out_down = (in_up << (32 - amount)) | out_down;
29345 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29346 out_up = in_up << amount;
29348 The ARM and Thumb2 variants are the same but implemented slightly
29349 differently. If this were only called during expand we could just
29350 use the Thumb2 case and let combine do the right thing, but this
29351 can also be called from post-reload splitters. */
29353 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29355 if (!TARGET_THUMB2
)
29357 /* Emit code for ARM mode. */
29358 emit_insn (SET (out_down
,
29359 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29360 if (code
== ASHIFTRT
)
29362 rtx_code_label
*done_label
= gen_label_rtx ();
29363 emit_jump_insn (BRANCH (LT
, done_label
));
29364 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29366 emit_label (done_label
);
29369 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29374 /* Emit code for Thumb2 mode.
29375 Thumb2 can't do shift and or in one insn. */
29376 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29377 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29379 if (code
== ASHIFTRT
)
29381 rtx_code_label
*done_label
= gen_label_rtx ();
29382 emit_jump_insn (BRANCH (LT
, done_label
));
29383 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29384 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29385 emit_label (done_label
);
29389 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29390 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29394 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29408 /* Returns true if the pattern is a valid symbolic address, which is either a
29409 symbol_ref or (symbol_ref + addend).
29411 According to the ARM ELF ABI, the initial addend of REL-type relocations
29412 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29413 literal field of the instruction as a 16-bit signed value in the range
29414 -32768 <= A < 32768. */
29417 arm_valid_symbolic_address_p (rtx addr
)
29419 rtx xop0
, xop1
= NULL_RTX
;
29422 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29425 /* (const (plus: symbol_ref const_int)) */
29426 if (GET_CODE (addr
) == CONST
)
29427 tmp
= XEXP (addr
, 0);
29429 if (GET_CODE (tmp
) == PLUS
)
29431 xop0
= XEXP (tmp
, 0);
29432 xop1
= XEXP (tmp
, 1);
29434 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29435 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29441 /* Returns true if a valid comparison operation and makes
29442 the operands in a form that is valid. */
29444 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29446 enum rtx_code code
= GET_CODE (*comparison
);
29448 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29449 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29451 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29453 if (code
== UNEQ
|| code
== LTGT
)
29456 code_int
= (int)code
;
29457 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29458 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29463 if (!arm_add_operand (*op1
, mode
))
29464 *op1
= force_reg (mode
, *op1
);
29465 if (!arm_add_operand (*op2
, mode
))
29466 *op2
= force_reg (mode
, *op2
);
29470 if (!cmpdi_operand (*op1
, mode
))
29471 *op1
= force_reg (mode
, *op1
);
29472 if (!cmpdi_operand (*op2
, mode
))
29473 *op2
= force_reg (mode
, *op2
);
29477 if (!TARGET_VFP_FP16INST
)
29479 /* FP16 comparisons are done in SF mode. */
29481 *op1
= convert_to_mode (mode
, *op1
, 1);
29482 *op2
= convert_to_mode (mode
, *op2
, 1);
29483 /* Fall through. */
29486 if (!vfp_compare_operand (*op1
, mode
))
29487 *op1
= force_reg (mode
, *op1
);
29488 if (!vfp_compare_operand (*op2
, mode
))
29489 *op2
= force_reg (mode
, *op2
);
29499 /* Maximum number of instructions to set block of memory. */
29501 arm_block_set_max_insns (void)
29503 if (optimize_function_for_size_p (cfun
))
29506 return current_tune
->max_insns_inline_memset
;
29509 /* Return TRUE if it's profitable to set block of memory for
29510 non-vectorized case. VAL is the value to set the memory
29511 with. LENGTH is the number of bytes to set. ALIGN is the
29512 alignment of the destination memory in bytes. UNALIGNED_P
29513 is TRUE if we can only set the memory with instructions
29514 meeting alignment requirements. USE_STRD_P is TRUE if we
29515 can use strd to set the memory. */
29517 arm_block_set_non_vect_profit_p (rtx val
,
29518 unsigned HOST_WIDE_INT length
,
29519 unsigned HOST_WIDE_INT align
,
29520 bool unaligned_p
, bool use_strd_p
)
29523 /* For leftovers in bytes of 0-7, we can set the memory block using
29524 strb/strh/str with minimum instruction number. */
29525 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29529 num
= arm_const_inline_cost (SET
, val
);
29530 num
+= length
/ align
+ length
% align
;
29532 else if (use_strd_p
)
29534 num
= arm_const_double_inline_cost (val
);
29535 num
+= (length
>> 3) + leftover
[length
& 7];
29539 num
= arm_const_inline_cost (SET
, val
);
29540 num
+= (length
>> 2) + leftover
[length
& 3];
29543 /* We may be able to combine last pair STRH/STRB into a single STR
29544 by shifting one byte back. */
29545 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29548 return (num
<= arm_block_set_max_insns ());
29551 /* Return TRUE if it's profitable to set block of memory for
29552 vectorized case. LENGTH is the number of bytes to set.
29553 ALIGN is the alignment of destination memory in bytes.
29554 MODE is the vector mode used to set the memory. */
29556 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29557 unsigned HOST_WIDE_INT align
,
29561 bool unaligned_p
= ((align
& 3) != 0);
29562 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29564 /* Instruction loading constant value. */
29566 /* Instructions storing the memory. */
29567 num
+= (length
+ nelt
- 1) / nelt
;
29568 /* Instructions adjusting the address expression. Only need to
29569 adjust address expression if it's 4 bytes aligned and bytes
29570 leftover can only be stored by mis-aligned store instruction. */
29571 if (!unaligned_p
&& (length
& 3) != 0)
29574 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29575 if (!unaligned_p
&& mode
== V16QImode
)
29578 return (num
<= arm_block_set_max_insns ());
29581 /* Set a block of memory using vectorization instructions for the
29582 unaligned case. We fill the first LENGTH bytes of the memory
29583 area starting from DSTBASE with byte constant VALUE. ALIGN is
29584 the alignment requirement of memory. Return TRUE if succeeded. */
29586 arm_block_set_unaligned_vect (rtx dstbase
,
29587 unsigned HOST_WIDE_INT length
,
29588 unsigned HOST_WIDE_INT value
,
29589 unsigned HOST_WIDE_INT align
)
29591 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29593 rtx val_elt
, val_vec
, reg
;
29594 rtx rval
[MAX_VECT_LEN
];
29595 rtx (*gen_func
) (rtx
, rtx
);
29597 unsigned HOST_WIDE_INT v
= value
;
29598 unsigned int offset
= 0;
29599 gcc_assert ((align
& 0x3) != 0);
29600 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29601 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29602 if (length
>= nelt_v16
)
29605 gen_func
= gen_movmisalignv16qi
;
29610 gen_func
= gen_movmisalignv8qi
;
29612 nelt_mode
= GET_MODE_NUNITS (mode
);
29613 gcc_assert (length
>= nelt_mode
);
29614 /* Skip if it isn't profitable. */
29615 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29618 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29619 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29621 v
= sext_hwi (v
, BITS_PER_WORD
);
29622 val_elt
= GEN_INT (v
);
29623 for (j
= 0; j
< nelt_mode
; j
++)
29626 reg
= gen_reg_rtx (mode
);
29627 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29628 /* Emit instruction loading the constant value. */
29629 emit_move_insn (reg
, val_vec
);
29631 /* Handle nelt_mode bytes in a vector. */
29632 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29634 emit_insn ((*gen_func
) (mem
, reg
));
29635 if (i
+ 2 * nelt_mode
<= length
)
29637 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29638 offset
+= nelt_mode
;
29639 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29643 /* If there are not less than nelt_v8 bytes leftover, we must be in
29645 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29647 /* Handle (8, 16) bytes leftover. */
29648 if (i
+ nelt_v8
< length
)
29650 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29651 offset
+= length
- i
;
29652 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29654 /* We are shifting bytes back, set the alignment accordingly. */
29655 if ((length
& 1) != 0 && align
>= 2)
29656 set_mem_align (mem
, BITS_PER_UNIT
);
29658 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29660 /* Handle (0, 8] bytes leftover. */
29661 else if (i
< length
&& i
+ nelt_v8
>= length
)
29663 if (mode
== V16QImode
)
29664 reg
= gen_lowpart (V8QImode
, reg
);
29666 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29667 + (nelt_mode
- nelt_v8
))));
29668 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29669 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29671 /* We are shifting bytes back, set the alignment accordingly. */
29672 if ((length
& 1) != 0 && align
>= 2)
29673 set_mem_align (mem
, BITS_PER_UNIT
);
29675 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29681 /* Set a block of memory using vectorization instructions for the
29682 aligned case. We fill the first LENGTH bytes of the memory area
29683 starting from DSTBASE with byte constant VALUE. ALIGN is the
29684 alignment requirement of memory. Return TRUE if succeeded. */
29686 arm_block_set_aligned_vect (rtx dstbase
,
29687 unsigned HOST_WIDE_INT length
,
29688 unsigned HOST_WIDE_INT value
,
29689 unsigned HOST_WIDE_INT align
)
29691 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29692 rtx dst
, addr
, mem
;
29693 rtx val_elt
, val_vec
, reg
;
29694 rtx rval
[MAX_VECT_LEN
];
29696 unsigned HOST_WIDE_INT v
= value
;
29697 unsigned int offset
= 0;
29699 gcc_assert ((align
& 0x3) == 0);
29700 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29701 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29702 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29707 nelt_mode
= GET_MODE_NUNITS (mode
);
29708 gcc_assert (length
>= nelt_mode
);
29709 /* Skip if it isn't profitable. */
29710 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29713 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29715 v
= sext_hwi (v
, BITS_PER_WORD
);
29716 val_elt
= GEN_INT (v
);
29717 for (j
= 0; j
< nelt_mode
; j
++)
29720 reg
= gen_reg_rtx (mode
);
29721 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29722 /* Emit instruction loading the constant value. */
29723 emit_move_insn (reg
, val_vec
);
29726 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29727 if (mode
== V16QImode
)
29729 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29730 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29732 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29733 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29735 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29736 offset
+= length
- nelt_mode
;
29737 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29738 /* We are shifting bytes back, set the alignment accordingly. */
29739 if ((length
& 0x3) == 0)
29740 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29741 else if ((length
& 0x1) == 0)
29742 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29744 set_mem_align (mem
, BITS_PER_UNIT
);
29746 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29749 /* Fall through for bytes leftover. */
29751 nelt_mode
= GET_MODE_NUNITS (mode
);
29752 reg
= gen_lowpart (V8QImode
, reg
);
29755 /* Handle 8 bytes in a vector. */
29756 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29758 addr
= plus_constant (Pmode
, dst
, i
);
29759 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29760 emit_move_insn (mem
, reg
);
29763 /* Handle single word leftover by shifting 4 bytes back. We can
29764 use aligned access for this case. */
29765 if (i
+ UNITS_PER_WORD
== length
)
29767 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29768 offset
+= i
- UNITS_PER_WORD
;
29769 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29770 /* We are shifting 4 bytes back, set the alignment accordingly. */
29771 if (align
> UNITS_PER_WORD
)
29772 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29774 emit_move_insn (mem
, reg
);
29776 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29777 We have to use unaligned access for this case. */
29778 else if (i
< length
)
29780 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29781 offset
+= length
- nelt_mode
;
29782 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29783 /* We are shifting bytes back, set the alignment accordingly. */
29784 if ((length
& 1) == 0)
29785 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29787 set_mem_align (mem
, BITS_PER_UNIT
);
29789 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29795 /* Set a block of memory using plain strh/strb instructions, only
29796 using instructions allowed by ALIGN on processor. We fill the
29797 first LENGTH bytes of the memory area starting from DSTBASE
29798 with byte constant VALUE. ALIGN is the alignment requirement
29801 arm_block_set_unaligned_non_vect (rtx dstbase
,
29802 unsigned HOST_WIDE_INT length
,
29803 unsigned HOST_WIDE_INT value
,
29804 unsigned HOST_WIDE_INT align
)
29807 rtx dst
, addr
, mem
;
29808 rtx val_exp
, val_reg
, reg
;
29810 HOST_WIDE_INT v
= value
;
29812 gcc_assert (align
== 1 || align
== 2);
29815 v
|= (value
<< BITS_PER_UNIT
);
29817 v
= sext_hwi (v
, BITS_PER_WORD
);
29818 val_exp
= GEN_INT (v
);
29819 /* Skip if it isn't profitable. */
29820 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29821 align
, true, false))
29824 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29825 mode
= (align
== 2 ? HImode
: QImode
);
29826 val_reg
= force_reg (SImode
, val_exp
);
29827 reg
= gen_lowpart (mode
, val_reg
);
29829 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29831 addr
= plus_constant (Pmode
, dst
, i
);
29832 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29833 emit_move_insn (mem
, reg
);
29836 /* Handle single byte leftover. */
29837 if (i
+ 1 == length
)
29839 reg
= gen_lowpart (QImode
, val_reg
);
29840 addr
= plus_constant (Pmode
, dst
, i
);
29841 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29842 emit_move_insn (mem
, reg
);
29846 gcc_assert (i
== length
);
29850 /* Set a block of memory using plain strd/str/strh/strb instructions,
29851 to permit unaligned copies on processors which support unaligned
29852 semantics for those instructions. We fill the first LENGTH bytes
29853 of the memory area starting from DSTBASE with byte constant VALUE.
29854 ALIGN is the alignment requirement of memory. */
29856 arm_block_set_aligned_non_vect (rtx dstbase
,
29857 unsigned HOST_WIDE_INT length
,
29858 unsigned HOST_WIDE_INT value
,
29859 unsigned HOST_WIDE_INT align
)
29862 rtx dst
, addr
, mem
;
29863 rtx val_exp
, val_reg
, reg
;
29864 unsigned HOST_WIDE_INT v
;
29867 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29868 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29870 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29871 if (length
< UNITS_PER_WORD
)
29872 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29875 v
|= (v
<< BITS_PER_WORD
);
29877 v
= sext_hwi (v
, BITS_PER_WORD
);
29879 val_exp
= GEN_INT (v
);
29880 /* Skip if it isn't profitable. */
29881 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29882 align
, false, use_strd_p
))
29887 /* Try without strd. */
29888 v
= (v
>> BITS_PER_WORD
);
29889 v
= sext_hwi (v
, BITS_PER_WORD
);
29890 val_exp
= GEN_INT (v
);
29891 use_strd_p
= false;
29892 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29893 align
, false, use_strd_p
))
29898 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29899 /* Handle double words using strd if possible. */
29902 val_reg
= force_reg (DImode
, val_exp
);
29904 for (; (i
+ 8 <= length
); i
+= 8)
29906 addr
= plus_constant (Pmode
, dst
, i
);
29907 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29908 emit_move_insn (mem
, reg
);
29912 val_reg
= force_reg (SImode
, val_exp
);
29914 /* Handle words. */
29915 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29916 for (; (i
+ 4 <= length
); i
+= 4)
29918 addr
= plus_constant (Pmode
, dst
, i
);
29919 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29920 if ((align
& 3) == 0)
29921 emit_move_insn (mem
, reg
);
29923 emit_insn (gen_unaligned_storesi (mem
, reg
));
29926 /* Merge last pair of STRH and STRB into a STR if possible. */
29927 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29929 addr
= plus_constant (Pmode
, dst
, i
- 1);
29930 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29931 /* We are shifting one byte back, set the alignment accordingly. */
29932 if ((align
& 1) == 0)
29933 set_mem_align (mem
, BITS_PER_UNIT
);
29935 /* Most likely this is an unaligned access, and we can't tell at
29936 compilation time. */
29937 emit_insn (gen_unaligned_storesi (mem
, reg
));
29941 /* Handle half word leftover. */
29942 if (i
+ 2 <= length
)
29944 reg
= gen_lowpart (HImode
, val_reg
);
29945 addr
= plus_constant (Pmode
, dst
, i
);
29946 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29947 if ((align
& 1) == 0)
29948 emit_move_insn (mem
, reg
);
29950 emit_insn (gen_unaligned_storehi (mem
, reg
));
29955 /* Handle single byte leftover. */
29956 if (i
+ 1 == length
)
29958 reg
= gen_lowpart (QImode
, val_reg
);
29959 addr
= plus_constant (Pmode
, dst
, i
);
29960 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29961 emit_move_insn (mem
, reg
);
29967 /* Set a block of memory using vectorization instructions for both
29968 aligned and unaligned cases. We fill the first LENGTH bytes of
29969 the memory area starting from DSTBASE with byte constant VALUE.
29970 ALIGN is the alignment requirement of memory. */
29972 arm_block_set_vect (rtx dstbase
,
29973 unsigned HOST_WIDE_INT length
,
29974 unsigned HOST_WIDE_INT value
,
29975 unsigned HOST_WIDE_INT align
)
29977 /* Check whether we need to use unaligned store instruction. */
29978 if (((align
& 3) != 0 || (length
& 3) != 0)
29979 /* Check whether unaligned store instruction is available. */
29980 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29983 if ((align
& 3) == 0)
29984 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29986 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29989 /* Expand string store operation. Firstly we try to do that by using
29990 vectorization instructions, then try with ARM unaligned access and
29991 double-word store if profitable. OPERANDS[0] is the destination,
29992 OPERANDS[1] is the number of bytes, operands[2] is the value to
29993 initialize the memory, OPERANDS[3] is the known alignment of the
29996 arm_gen_setmem (rtx
*operands
)
29998 rtx dstbase
= operands
[0];
29999 unsigned HOST_WIDE_INT length
;
30000 unsigned HOST_WIDE_INT value
;
30001 unsigned HOST_WIDE_INT align
;
30003 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30006 length
= UINTVAL (operands
[1]);
30010 value
= (UINTVAL (operands
[2]) & 0xFF);
30011 align
= UINTVAL (operands
[3]);
30012 if (TARGET_NEON
&& length
>= 8
30013 && current_tune
->string_ops_prefer_neon
30014 && arm_block_set_vect (dstbase
, length
, value
, align
))
30017 if (!unaligned_access
&& (align
& 3) != 0)
30018 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30020 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30025 arm_macro_fusion_p (void)
30027 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30030 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30031 for MOVW / MOVT macro fusion. */
30034 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30036 /* We are trying to fuse
30037 movw imm / movt imm
30038 instructions as a group that gets scheduled together. */
30040 rtx set_dest
= SET_DEST (curr_set
);
30042 if (GET_MODE (set_dest
) != SImode
)
30045 /* We are trying to match:
30046 prev (movw) == (set (reg r0) (const_int imm16))
30047 curr (movt) == (set (zero_extract (reg r0)
30050 (const_int imm16_1))
30052 prev (movw) == (set (reg r1)
30053 (high (symbol_ref ("SYM"))))
30054 curr (movt) == (set (reg r0)
30056 (symbol_ref ("SYM")))) */
30058 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30060 if (CONST_INT_P (SET_SRC (curr_set
))
30061 && CONST_INT_P (SET_SRC (prev_set
))
30062 && REG_P (XEXP (set_dest
, 0))
30063 && REG_P (SET_DEST (prev_set
))
30064 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30068 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30069 && REG_P (SET_DEST (curr_set
))
30070 && REG_P (SET_DEST (prev_set
))
30071 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30072 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30079 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30081 rtx prev_set
= single_set (prev
);
30082 rtx curr_set
= single_set (curr
);
30088 if (any_condjump_p (curr
))
30091 if (!arm_macro_fusion_p ())
30094 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30095 && aarch_crypto_can_dual_issue (prev
, curr
))
30098 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30099 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30105 /* Return true iff the instruction fusion described by OP is enabled. */
30107 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30109 return current_tune
->fusible_ops
& op
;
30112 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30113 scheduled for speculative execution. Reject the long-running division
30114 and square-root instructions. */
30117 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30119 switch (get_attr_type (insn
))
30127 case TYPE_NEON_FP_SQRT_S
:
30128 case TYPE_NEON_FP_SQRT_D
:
30129 case TYPE_NEON_FP_SQRT_S_Q
:
30130 case TYPE_NEON_FP_SQRT_D_Q
:
30131 case TYPE_NEON_FP_DIV_S
:
30132 case TYPE_NEON_FP_DIV_D
:
30133 case TYPE_NEON_FP_DIV_S_Q
:
30134 case TYPE_NEON_FP_DIV_D_Q
:
30141 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30143 static unsigned HOST_WIDE_INT
30144 arm_asan_shadow_offset (void)
30146 return HOST_WIDE_INT_1U
<< 29;
30150 /* This is a temporary fix for PR60655. Ideally we need
30151 to handle most of these cases in the generic part but
30152 currently we reject minus (..) (sym_ref). We try to
30153 ameliorate the case with minus (sym_ref1) (sym_ref2)
30154 where they are in the same section. */
30157 arm_const_not_ok_for_debug_p (rtx p
)
30159 tree decl_op0
= NULL
;
30160 tree decl_op1
= NULL
;
30162 if (GET_CODE (p
) == MINUS
)
30164 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30166 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30168 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30169 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30171 if ((VAR_P (decl_op1
)
30172 || TREE_CODE (decl_op1
) == CONST_DECL
)
30173 && (VAR_P (decl_op0
)
30174 || TREE_CODE (decl_op0
) == CONST_DECL
))
30175 return (get_variable_section (decl_op1
, false)
30176 != get_variable_section (decl_op0
, false));
30178 if (TREE_CODE (decl_op1
) == LABEL_DECL
30179 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30180 return (DECL_CONTEXT (decl_op1
)
30181 != DECL_CONTEXT (decl_op0
));
30191 /* return TRUE if x is a reference to a value in a constant pool */
30193 arm_is_constant_pool_ref (rtx x
)
30196 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30197 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30200 /* Remember the last target of arm_set_current_function. */
30201 static GTY(()) tree arm_previous_fndecl
;
30203 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30206 save_restore_target_globals (tree new_tree
)
30208 /* If we have a previous state, use it. */
30209 if (TREE_TARGET_GLOBALS (new_tree
))
30210 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30211 else if (new_tree
== target_option_default_node
)
30212 restore_target_globals (&default_target_globals
);
30215 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30216 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30219 arm_option_params_internal ();
30222 /* Invalidate arm_previous_fndecl. */
30225 arm_reset_previous_fndecl (void)
30227 arm_previous_fndecl
= NULL_TREE
;
30230 /* Establish appropriate back-end context for processing the function
30231 FNDECL. The argument might be NULL to indicate processing at top
30232 level, outside of any function scope. */
30235 arm_set_current_function (tree fndecl
)
30237 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30240 tree old_tree
= (arm_previous_fndecl
30241 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30244 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30246 /* If current function has no attributes but previous one did,
30247 use the default node. */
30248 if (! new_tree
&& old_tree
)
30249 new_tree
= target_option_default_node
;
30251 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30252 the default have been handled by save_restore_target_globals from
30253 arm_pragma_target_parse. */
30254 if (old_tree
== new_tree
)
30257 arm_previous_fndecl
= fndecl
;
30259 /* First set the target options. */
30260 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30262 save_restore_target_globals (new_tree
);
30265 /* Implement TARGET_OPTION_PRINT. */
30268 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30270 int flags
= ptr
->x_target_flags
;
30271 const char *fpu_name
;
30273 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30274 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30276 fprintf (file
, "%*sselected arch %s\n", indent
, "",
30277 TARGET_THUMB2_P (flags
) ? "thumb2" :
30278 TARGET_THUMB_P (flags
) ? "thumb1" :
30281 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30284 /* Hook to determine if one function can safely inline another. */
30287 arm_can_inline_p (tree caller
, tree callee
)
30289 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30290 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30291 bool can_inline
= true;
30293 struct cl_target_option
*caller_opts
30294 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30295 : target_option_default_node
);
30297 struct cl_target_option
*callee_opts
30298 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30299 : target_option_default_node
);
30301 if (callee_opts
== caller_opts
)
30304 /* Callee's ISA features should be a subset of the caller's. */
30305 struct arm_build_target caller_target
;
30306 struct arm_build_target callee_target
;
30307 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30308 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30310 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30312 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30314 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30315 can_inline
= false;
30317 sbitmap_free (caller_target
.isa
);
30318 sbitmap_free (callee_target
.isa
);
30320 /* OK to inline between different modes.
30321 Function with mode specific instructions, e.g using asm,
30322 must be explicitly protected with noinline. */
30326 /* Hook to fix function's alignment affected by target attribute. */
30329 arm_relayout_function (tree fndecl
)
30331 if (DECL_USER_ALIGN (fndecl
))
30334 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30337 callee_tree
= target_option_default_node
;
30339 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30342 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30345 /* Inner function to process the attribute((target(...))), take an argument and
30346 set the current options from the argument. If we have a list, recursively
30347 go over the list. */
30350 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30352 if (TREE_CODE (args
) == TREE_LIST
)
30356 for (; args
; args
= TREE_CHAIN (args
))
30357 if (TREE_VALUE (args
)
30358 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30363 else if (TREE_CODE (args
) != STRING_CST
)
30365 error ("attribute %<target%> argument not a string");
30369 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30372 while ((q
= strtok (argstr
, ",")) != NULL
)
30374 while (ISSPACE (*q
)) ++q
;
30377 if (!strncmp (q
, "thumb", 5))
30378 opts
->x_target_flags
|= MASK_THUMB
;
30380 else if (!strncmp (q
, "arm", 3))
30381 opts
->x_target_flags
&= ~MASK_THUMB
;
30383 else if (!strncmp (q
, "fpu=", 4))
30386 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30387 &fpu_index
, CL_TARGET
))
30389 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30392 if (fpu_index
== TARGET_FPU_auto
)
30394 /* This doesn't really make sense until we support
30395 general dynamic selection of the architecture and all
30397 sorry ("auto fpu selection not currently permitted here");
30400 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30404 error ("attribute(target(\"%s\")) is unknown", q
);
30412 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30415 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30416 struct gcc_options
*opts_set
)
30418 struct cl_target_option cl_opts
;
30420 if (!arm_valid_target_attribute_rec (args
, opts
))
30423 cl_target_option_save (&cl_opts
, opts
);
30424 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30425 arm_option_check_internal (opts
);
30426 /* Do any overrides, such as global options arch=xxx. */
30427 arm_option_override_internal (opts
, opts_set
);
30429 return build_target_option_node (opts
);
30433 add_attribute (const char * mode
, tree
*attributes
)
30435 size_t len
= strlen (mode
);
30436 tree value
= build_string (len
, mode
);
30438 TREE_TYPE (value
) = build_array_type (char_type_node
,
30439 build_index_type (size_int (len
)));
30441 *attributes
= tree_cons (get_identifier ("target"),
30442 build_tree_list (NULL_TREE
, value
),
30446 /* For testing. Insert thumb or arm modes alternatively on functions. */
30449 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30453 if (! TARGET_FLIP_THUMB
)
30456 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30457 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30460 /* Nested definitions must inherit mode. */
30461 if (current_function_decl
)
30463 mode
= TARGET_THUMB
? "thumb" : "arm";
30464 add_attribute (mode
, attributes
);
30468 /* If there is already a setting don't change it. */
30469 if (lookup_attribute ("target", *attributes
) != NULL
)
30472 mode
= thumb_flipper
? "thumb" : "arm";
30473 add_attribute (mode
, attributes
);
30475 thumb_flipper
= !thumb_flipper
;
30478 /* Hook to validate attribute((target("string"))). */
30481 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30482 tree args
, int ARG_UNUSED (flags
))
30485 struct gcc_options func_options
;
30486 tree cur_tree
, new_optimize
;
30487 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30489 /* Get the optimization options of the current function. */
30490 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30492 /* If the function changed the optimization levels as well as setting target
30493 options, start with the optimizations specified. */
30494 if (!func_optimize
)
30495 func_optimize
= optimization_default_node
;
30497 /* Init func_options. */
30498 memset (&func_options
, 0, sizeof (func_options
));
30499 init_options_struct (&func_options
, NULL
);
30500 lang_hooks
.init_options_struct (&func_options
);
30502 /* Initialize func_options to the defaults. */
30503 cl_optimization_restore (&func_options
,
30504 TREE_OPTIMIZATION (func_optimize
));
30506 cl_target_option_restore (&func_options
,
30507 TREE_TARGET_OPTION (target_option_default_node
));
30509 /* Set func_options flags with new target mode. */
30510 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30511 &global_options_set
);
30513 if (cur_tree
== NULL_TREE
)
30516 new_optimize
= build_optimization_node (&func_options
);
30518 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30520 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30522 finalize_options_struct (&func_options
);
30527 /* Match an ISA feature bitmap to a named FPU. We always use the
30528 first entry that exactly matches the feature set, so that we
30529 effectively canonicalize the FPU name for the assembler. */
30531 arm_identify_fpu_from_isa (sbitmap isa
)
30533 auto_sbitmap
fpubits (isa_num_bits
);
30534 auto_sbitmap
cand_fpubits (isa_num_bits
);
30536 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30538 /* If there are no ISA feature bits relating to the FPU, we must be
30539 doing soft-float. */
30540 if (bitmap_empty_p (fpubits
))
30543 for (unsigned int i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
30545 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30546 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30547 return all_fpus
[i
].name
;
30549 /* We must find an entry, or things have gone wrong. */
30550 gcc_unreachable ();
30554 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30557 fprintf (stream
, "\t.syntax unified\n");
30561 if (is_called_in_ARM_mode (decl
)
30562 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30563 && cfun
->is_thunk
))
30564 fprintf (stream
, "\t.code 32\n");
30565 else if (TARGET_THUMB1
)
30566 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30568 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30571 fprintf (stream
, "\t.arm\n");
30573 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30576 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30578 if (TARGET_POKE_FUNCTION_NAME
)
30579 arm_poke_function_name (stream
, (const char *) name
);
30582 /* If MEM is in the form of [base+offset], extract the two parts
30583 of address and set to BASE and OFFSET, otherwise return false
30584 after clearing BASE and OFFSET. */
30587 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30591 gcc_assert (MEM_P (mem
));
30593 addr
= XEXP (mem
, 0);
30595 /* Strip off const from addresses like (const (addr)). */
30596 if (GET_CODE (addr
) == CONST
)
30597 addr
= XEXP (addr
, 0);
30599 if (GET_CODE (addr
) == REG
)
30602 *offset
= const0_rtx
;
30606 if (GET_CODE (addr
) == PLUS
30607 && GET_CODE (XEXP (addr
, 0)) == REG
30608 && CONST_INT_P (XEXP (addr
, 1)))
30610 *base
= XEXP (addr
, 0);
30611 *offset
= XEXP (addr
, 1);
30616 *offset
= NULL_RTX
;
30621 /* If INSN is a load or store of address in the form of [base+offset],
30622 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30623 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30624 otherwise return FALSE. */
30627 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30631 gcc_assert (INSN_P (insn
));
30632 x
= PATTERN (insn
);
30633 if (GET_CODE (x
) != SET
)
30637 dest
= SET_DEST (x
);
30638 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30641 extract_base_offset_in_addr (dest
, base
, offset
);
30643 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30646 extract_base_offset_in_addr (src
, base
, offset
);
30651 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30654 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30656 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30657 and PRI are only calculated for these instructions. For other instruction,
30658 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30659 instruction fusion can be supported by returning different priorities.
30661 It's important that irrelevant instructions get the largest FUSION_PRI. */
30664 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30665 int *fusion_pri
, int *pri
)
30671 gcc_assert (INSN_P (insn
));
30674 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30681 /* Load goes first. */
30683 *fusion_pri
= tmp
- 1;
30685 *fusion_pri
= tmp
- 2;
30689 /* INSN with smaller base register goes first. */
30690 tmp
-= ((REGNO (base
) & 0xff) << 20);
30692 /* INSN with smaller offset goes first. */
30693 off_val
= (int)(INTVAL (offset
));
30695 tmp
-= (off_val
& 0xfffff);
30697 tmp
+= ((- off_val
) & 0xfffff);
30704 /* Construct and return a PARALLEL RTX vector with elements numbering the
30705 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30706 the vector - from the perspective of the architecture. This does not
30707 line up with GCC's perspective on lane numbers, so we end up with
30708 different masks depending on our target endian-ness. The diagram
30709 below may help. We must draw the distinction when building masks
30710 which select one half of the vector. An instruction selecting
30711 architectural low-lanes for a big-endian target, must be described using
30712 a mask selecting GCC high-lanes.
30714 Big-Endian Little-Endian
30716 GCC 0 1 2 3 3 2 1 0
30717 | x | x | x | x | | x | x | x | x |
30718 Architecture 3 2 1 0 3 2 1 0
30720 Low Mask: { 2, 3 } { 0, 1 }
30721 High Mask: { 0, 1 } { 2, 3 }
30725 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30727 int nunits
= GET_MODE_NUNITS (mode
);
30728 rtvec v
= rtvec_alloc (nunits
/ 2);
30729 int high_base
= nunits
/ 2;
30735 if (BYTES_BIG_ENDIAN
)
30736 base
= high
? low_base
: high_base
;
30738 base
= high
? high_base
: low_base
;
30740 for (i
= 0; i
< nunits
/ 2; i
++)
30741 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30743 t1
= gen_rtx_PARALLEL (mode
, v
);
30747 /* Check OP for validity as a PARALLEL RTX vector with elements
30748 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30749 from the perspective of the architecture. See the diagram above
30750 arm_simd_vect_par_cnst_half_p for more details. */
30753 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30756 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30757 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30758 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30761 if (!VECTOR_MODE_P (mode
))
30764 if (count_op
!= count_ideal
)
30767 for (i
= 0; i
< count_ideal
; i
++)
30769 rtx elt_op
= XVECEXP (op
, 0, i
);
30770 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30772 if (!CONST_INT_P (elt_op
)
30773 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30779 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30782 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30785 /* For now, we punt and not handle this for TARGET_THUMB1. */
30786 if (vcall_offset
&& TARGET_THUMB1
)
30789 /* Otherwise ok. */
30793 /* Generate RTL for a conditional branch with rtx comparison CODE in
30794 mode CC_MODE. The destination of the unlikely conditional branch
30798 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
30802 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
30803 gen_rtx_REG (cc_mode
, CC_REGNUM
),
30806 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30807 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
30809 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30812 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30814 For pure-code sections there is no letter code for this attribute, so
30815 output all the section flags numerically when this is needed. */
30818 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
30821 if (flags
& SECTION_ARM_PURECODE
)
30825 if (!(flags
& SECTION_DEBUG
))
30827 if (flags
& SECTION_EXCLUDE
)
30828 *num
|= 0x80000000;
30829 if (flags
& SECTION_WRITE
)
30831 if (flags
& SECTION_CODE
)
30833 if (flags
& SECTION_MERGE
)
30835 if (flags
& SECTION_STRINGS
)
30837 if (flags
& SECTION_TLS
)
30839 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
30848 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30850 If pure-code is passed as an option, make sure all functions are in
30851 sections that have the SHF_ARM_PURECODE attribute. */
30854 arm_function_section (tree decl
, enum node_frequency freq
,
30855 bool startup
, bool exit
)
30857 const char * section_name
;
30860 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
30861 return default_function_section (decl
, freq
, startup
, exit
);
30863 if (!target_pure_code
)
30864 return default_function_section (decl
, freq
, startup
, exit
);
30867 section_name
= DECL_SECTION_NAME (decl
);
30869 /* If a function is not in a named section then it falls under the 'default'
30870 text section, also known as '.text'. We can preserve previous behavior as
30871 the default text section already has the SHF_ARM_PURECODE section
30875 section
*default_sec
= default_function_section (decl
, freq
, startup
,
30878 /* If default_sec is not null, then it must be a special section like for
30879 example .text.startup. We set the pure-code attribute and return the
30880 same section to preserve existing behavior. */
30882 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30883 return default_sec
;
30886 /* Otherwise look whether a section has already been created with
30888 sec
= get_named_section (decl
, section_name
, 0);
30890 /* If that is not the case passing NULL as the section's name to
30891 'get_named_section' will create a section with the declaration's
30893 sec
= get_named_section (decl
, NULL
, 0);
30895 /* Set the SHF_ARM_PURECODE attribute. */
30896 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30901 /* Implements the TARGET_SECTION_FLAGS hook.
30903 If DECL is a function declaration and pure-code is passed as an option
30904 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30905 section's name and RELOC indicates whether the declarations initializer may
30906 contain runtime relocations. */
30908 static unsigned int
30909 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
30911 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
30913 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
30914 flags
|= SECTION_ARM_PURECODE
;
30919 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
30922 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
30924 rtx
*quot_p
, rtx
*rem_p
)
30926 if (mode
== SImode
)
30927 gcc_assert (!TARGET_IDIV
);
30929 machine_mode libval_mode
= smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode
),
30932 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
30934 op0
, GET_MODE (op0
),
30935 op1
, GET_MODE (op1
));
30937 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
30938 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
30939 GET_MODE_SIZE (mode
));
30941 gcc_assert (quotient
);
30942 gcc_assert (remainder
);
30944 *quot_p
= quotient
;
30945 *rem_p
= remainder
;
30948 /* This function checks for the availability of the coprocessor builtin passed
30949 in BUILTIN for the current target. Returns true if it is available and
30950 false otherwise. If a BUILTIN is passed for which this function has not
30951 been implemented it will cause an exception. */
30954 arm_coproc_builtin_available (enum unspecv builtin
)
30956 /* None of these builtins are available in Thumb mode if the target only
30957 supports Thumb-1. */
30975 case VUNSPEC_LDC2L
:
30977 case VUNSPEC_STC2L
:
30980 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
30987 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
30989 if (arm_arch6
|| arm_arch5te
)
30992 case VUNSPEC_MCRR2
:
30993 case VUNSPEC_MRRC2
:
30998 gcc_unreachable ();
31003 /* This function returns true if OP is a valid memory operand for the ldc and
31004 stc coprocessor instructions and false otherwise. */
31007 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31009 HOST_WIDE_INT range
;
31010 /* Has to be a memory operand. */
31016 /* We accept registers. */
31020 switch GET_CODE (op
)
31024 /* Or registers with an offset. */
31025 if (!REG_P (XEXP (op
, 0)))
31030 /* The offset must be an immediate though. */
31031 if (!CONST_INT_P (op
))
31034 range
= INTVAL (op
);
31036 /* Within the range of [-1020,1020]. */
31037 if (!IN_RANGE (range
, -1020, 1020))
31040 /* And a multiple of 4. */
31041 return (range
% 4) == 0;
31047 return REG_P (XEXP (op
, 0));
31049 gcc_unreachable ();
31053 #include "gt-arm.h"