1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
40 #include "diagnostic-core.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
61 #include "target-globals.h"
63 #include "tm-constrs.h"
65 #include "optabs-libfuncs.h"
68 /* This file should be included last. */
69 #include "target-def.h"
71 /* Forward definitions of types. */
72 typedef struct minipool_node Mnode
;
73 typedef struct minipool_fixup Mfix
;
75 void (*arm_lang_output_object_attributes_hook
)(void);
82 /* Forward function declarations. */
83 static bool arm_const_not_ok_for_debug_p (rtx
);
84 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
85 static int arm_compute_static_chain_stack_bytes (void);
86 static arm_stack_offsets
*arm_get_frame_offsets (void);
87 static void arm_add_gc_roots (void);
88 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
89 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
90 static unsigned bit_count (unsigned long);
91 static unsigned bitmap_popcount (const sbitmap
);
92 static int arm_address_register_rtx_p (rtx
, int);
93 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
94 static bool is_called_in_ARM_mode (tree
);
95 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
96 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
97 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
98 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
99 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
100 inline static int thumb1_index_register_rtx_p (rtx
, int);
101 static int thumb_far_jump_used_p (void);
102 static bool thumb_force_lr_save (void);
103 static unsigned arm_size_return_regs (void);
104 static bool arm_assemble_integer (rtx
, unsigned int, int);
105 static void arm_print_operand (FILE *, rtx
, int);
106 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
107 static bool arm_print_operand_punct_valid_p (unsigned char code
);
108 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
109 static arm_cc
get_arm_condition_code (rtx
);
110 static const char *output_multi_immediate (rtx
*, const char *, const char *,
112 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
113 static struct machine_function
*arm_init_machine_status (void);
114 static void thumb_exit (FILE *, int);
115 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
116 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
117 static Mnode
*add_minipool_forward_ref (Mfix
*);
118 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
119 static Mnode
*add_minipool_backward_ref (Mfix
*);
120 static void assign_minipool_offsets (Mfix
*);
121 static void arm_print_value (FILE *, rtx
);
122 static void dump_minipool (rtx_insn
*);
123 static int arm_barrier_cost (rtx_insn
*);
124 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
125 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
126 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
128 static void arm_reorg (void);
129 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
130 static unsigned long arm_compute_save_reg0_reg12_mask (void);
131 static unsigned long arm_compute_save_reg_mask (void);
132 static unsigned long arm_isr_value (tree
);
133 static unsigned long arm_compute_func_type (void);
134 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
135 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
136 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
138 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
140 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
141 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
142 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
143 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
144 static int arm_comp_type_attributes (const_tree
, const_tree
);
145 static void arm_set_default_type_attributes (tree
);
146 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
147 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
148 static int optimal_immediate_sequence (enum rtx_code code
,
149 unsigned HOST_WIDE_INT val
,
150 struct four_ints
*return_sequence
);
151 static int optimal_immediate_sequence_1 (enum rtx_code code
,
152 unsigned HOST_WIDE_INT val
,
153 struct four_ints
*return_sequence
,
155 static int arm_get_strip_length (int);
156 static bool arm_function_ok_for_sibcall (tree
, tree
);
157 static machine_mode
arm_promote_function_mode (const_tree
,
160 static bool arm_return_in_memory (const_tree
, const_tree
);
161 static rtx
arm_function_value (const_tree
, const_tree
, bool);
162 static rtx
arm_libcall_value_1 (machine_mode
);
163 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
164 static bool arm_function_value_regno_p (const unsigned int);
165 static void arm_internal_label (FILE *, const char *, unsigned long);
166 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
168 static bool arm_have_conditional_execution (void);
169 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
170 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
171 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
172 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
173 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
174 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
175 static void emit_constant_insn (rtx cond
, rtx pattern
);
176 static rtx_insn
*emit_set_insn (rtx
, rtx
);
177 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
178 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
180 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
182 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
184 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
185 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
187 static rtx
aapcs_libcall_value (machine_mode
);
188 static int aapcs_select_return_coproc (const_tree
, const_tree
);
190 #ifdef OBJECT_FORMAT_ELF
191 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
192 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
195 static void arm_encode_section_info (tree
, rtx
, int);
198 static void arm_file_end (void);
199 static void arm_file_start (void);
200 static void arm_insert_attributes (tree
, tree
*);
202 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
204 static bool arm_pass_by_reference (cumulative_args_t
,
205 machine_mode
, const_tree
, bool);
206 static bool arm_promote_prototypes (const_tree
);
207 static bool arm_default_short_enums (void);
208 static bool arm_align_anon_bitfield (void);
209 static bool arm_return_in_msb (const_tree
);
210 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
211 static bool arm_return_in_memory (const_tree
, const_tree
);
213 static void arm_unwind_emit (FILE *, rtx_insn
*);
214 static bool arm_output_ttype (rtx
);
215 static void arm_asm_emit_except_personality (rtx
);
217 static void arm_asm_init_sections (void);
218 static rtx
arm_dwarf_register_span (rtx
);
220 static tree
arm_cxx_guard_type (void);
221 static bool arm_cxx_guard_mask_bit (void);
222 static tree
arm_get_cookie_size (tree
);
223 static bool arm_cookie_has_size (void);
224 static bool arm_cxx_cdtor_returns_this (void);
225 static bool arm_cxx_key_method_may_be_inline (void);
226 static void arm_cxx_determine_class_data_visibility (tree
);
227 static bool arm_cxx_class_data_always_comdat (void);
228 static bool arm_cxx_use_aeabi_atexit (void);
229 static void arm_init_libfuncs (void);
230 static tree
arm_build_builtin_va_list (void);
231 static void arm_expand_builtin_va_start (tree
, rtx
);
232 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
233 static void arm_option_override (void);
234 static void arm_option_restore (struct gcc_options
*,
235 struct cl_target_option
*);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option
*);
238 static void arm_set_current_function (tree
);
239 static bool arm_can_inline_p (tree
, tree
);
240 static void arm_relayout_function (tree
);
241 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
242 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
243 static bool arm_sched_can_speculate_insn (rtx_insn
*);
244 static bool arm_macro_fusion_p (void);
245 static bool arm_cannot_copy_insn_p (rtx_insn
*);
246 static int arm_issue_rate (void);
247 static int arm_first_cycle_multipass_dfa_lookahead (void);
248 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
249 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
250 static bool arm_output_addr_const_extra (FILE *, rtx
);
251 static bool arm_allocate_stack_slots_for_args (void);
252 static bool arm_warn_func_return (tree
);
253 static tree
arm_promoted_type (const_tree t
);
254 static bool arm_scalar_mode_supported_p (machine_mode
);
255 static bool arm_frame_pointer_required (void);
256 static bool arm_can_eliminate (const int, const int);
257 static void arm_asm_trampoline_template (FILE *);
258 static void arm_trampoline_init (rtx
, tree
, rtx
);
259 static rtx
arm_trampoline_adjust_address (rtx
);
260 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
261 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
262 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
263 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
264 static bool arm_array_mode_supported_p (machine_mode
,
265 unsigned HOST_WIDE_INT
);
266 static machine_mode
arm_preferred_simd_mode (machine_mode
);
267 static bool arm_class_likely_spilled_p (reg_class_t
);
268 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
269 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
270 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
274 static void arm_conditional_register_usage (void);
275 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
276 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
277 static unsigned int arm_autovectorize_vector_sizes (void);
278 static int arm_default_branch_cost (bool, bool);
279 static int arm_cortex_a5_branch_cost (bool, bool);
280 static int arm_cortex_m_branch_cost (bool, bool);
281 static int arm_cortex_m7_branch_cost (bool, bool);
283 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
284 const unsigned char *sel
);
286 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
288 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
290 int misalign ATTRIBUTE_UNUSED
);
291 static unsigned arm_add_stmt_cost (void *data
, int count
,
292 enum vect_cost_for_stmt kind
,
293 struct _stmt_vec_info
*stmt_info
,
295 enum vect_cost_model_location where
);
297 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
298 bool op0_preserve_value
);
299 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
301 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
302 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
304 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
305 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
306 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
308 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
309 static machine_mode
arm_floatn_mode (int, bool);
311 /* Table of machine attributes. */
312 static const struct attribute_spec arm_attribute_table
[] =
314 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
315 affects_type_identity } */
316 /* Function calls made to this symbol must be done indirectly, because
317 it may lie outside of the 26 bit addressing range of a normal function
319 { "long_call", 0, 0, false, true, true, NULL
, false },
320 /* Whereas these functions are always known to reside within the 26 bit
322 { "short_call", 0, 0, false, true, true, NULL
, false },
323 /* Specify the procedure call conventions for a function. */
324 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
326 /* Interrupt Service Routines have special prologue and epilogue requirements. */
327 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
329 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
331 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
334 /* ARM/PE has three new attributes:
336 dllexport - for exporting a function/variable that will live in a dll
337 dllimport - for importing a function/variable from a dll
339 Microsoft allows multiple declspecs in one __declspec, separating
340 them with spaces. We do NOT support this. Instead, use __declspec
343 { "dllimport", 0, 0, true, false, false, NULL
, false },
344 { "dllexport", 0, 0, true, false, false, NULL
, false },
345 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
347 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
348 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
349 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
350 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
353 /* ARMv8-M Security Extensions support. */
354 { "cmse_nonsecure_entry", 0, 0, true, false, false,
355 arm_handle_cmse_nonsecure_entry
, false },
356 { "cmse_nonsecure_call", 0, 0, true, false, false,
357 arm_handle_cmse_nonsecure_call
, true },
358 { NULL
, 0, 0, false, false, false, NULL
, false }
361 /* Initialize the GCC target structure. */
362 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
363 #undef TARGET_MERGE_DECL_ATTRIBUTES
364 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
367 #undef TARGET_LEGITIMIZE_ADDRESS
368 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
373 #undef TARGET_INSERT_ATTRIBUTES
374 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
376 #undef TARGET_ASM_FILE_START
377 #define TARGET_ASM_FILE_START arm_file_start
378 #undef TARGET_ASM_FILE_END
379 #define TARGET_ASM_FILE_END arm_file_end
381 #undef TARGET_ASM_ALIGNED_SI_OP
382 #define TARGET_ASM_ALIGNED_SI_OP NULL
383 #undef TARGET_ASM_INTEGER
384 #define TARGET_ASM_INTEGER arm_assemble_integer
386 #undef TARGET_PRINT_OPERAND
387 #define TARGET_PRINT_OPERAND arm_print_operand
388 #undef TARGET_PRINT_OPERAND_ADDRESS
389 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
390 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
391 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
393 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
394 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
396 #undef TARGET_ASM_FUNCTION_PROLOGUE
397 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
399 #undef TARGET_ASM_FUNCTION_EPILOGUE
400 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
402 #undef TARGET_CAN_INLINE_P
403 #define TARGET_CAN_INLINE_P arm_can_inline_p
405 #undef TARGET_RELAYOUT_FUNCTION
406 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
408 #undef TARGET_OPTION_OVERRIDE
409 #define TARGET_OPTION_OVERRIDE arm_option_override
411 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
412 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
414 #undef TARGET_OPTION_RESTORE
415 #define TARGET_OPTION_RESTORE arm_option_restore
417 #undef TARGET_OPTION_PRINT
418 #define TARGET_OPTION_PRINT arm_option_print
420 #undef TARGET_COMP_TYPE_ATTRIBUTES
421 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
423 #undef TARGET_SCHED_CAN_SPECULATE_INSN
424 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
426 #undef TARGET_SCHED_MACRO_FUSION_P
427 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
429 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
430 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
432 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
433 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
435 #undef TARGET_SCHED_ADJUST_COST
436 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
438 #undef TARGET_SET_CURRENT_FUNCTION
439 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
441 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
442 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
444 #undef TARGET_SCHED_REORDER
445 #define TARGET_SCHED_REORDER arm_sched_reorder
447 #undef TARGET_REGISTER_MOVE_COST
448 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
450 #undef TARGET_MEMORY_MOVE_COST
451 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
453 #undef TARGET_ENCODE_SECTION_INFO
455 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
457 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
460 #undef TARGET_STRIP_NAME_ENCODING
461 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
463 #undef TARGET_ASM_INTERNAL_LABEL
464 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
466 #undef TARGET_FLOATN_MODE
467 #define TARGET_FLOATN_MODE arm_floatn_mode
469 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
470 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
472 #undef TARGET_FUNCTION_VALUE
473 #define TARGET_FUNCTION_VALUE arm_function_value
475 #undef TARGET_LIBCALL_VALUE
476 #define TARGET_LIBCALL_VALUE arm_libcall_value
478 #undef TARGET_FUNCTION_VALUE_REGNO_P
479 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
481 #undef TARGET_ASM_OUTPUT_MI_THUNK
482 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
483 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
484 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
486 #undef TARGET_RTX_COSTS
487 #define TARGET_RTX_COSTS arm_rtx_costs
488 #undef TARGET_ADDRESS_COST
489 #define TARGET_ADDRESS_COST arm_address_cost
491 #undef TARGET_SHIFT_TRUNCATION_MASK
492 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
493 #undef TARGET_VECTOR_MODE_SUPPORTED_P
494 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
495 #undef TARGET_ARRAY_MODE_SUPPORTED_P
496 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
497 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
498 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
499 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
500 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
501 arm_autovectorize_vector_sizes
503 #undef TARGET_MACHINE_DEPENDENT_REORG
504 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
506 #undef TARGET_INIT_BUILTINS
507 #define TARGET_INIT_BUILTINS arm_init_builtins
508 #undef TARGET_EXPAND_BUILTIN
509 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
510 #undef TARGET_BUILTIN_DECL
511 #define TARGET_BUILTIN_DECL arm_builtin_decl
513 #undef TARGET_INIT_LIBFUNCS
514 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
516 #undef TARGET_PROMOTE_FUNCTION_MODE
517 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
518 #undef TARGET_PROMOTE_PROTOTYPES
519 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
520 #undef TARGET_PASS_BY_REFERENCE
521 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
522 #undef TARGET_ARG_PARTIAL_BYTES
523 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
524 #undef TARGET_FUNCTION_ARG
525 #define TARGET_FUNCTION_ARG arm_function_arg
526 #undef TARGET_FUNCTION_ARG_ADVANCE
527 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
528 #undef TARGET_FUNCTION_ARG_BOUNDARY
529 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
531 #undef TARGET_SETUP_INCOMING_VARARGS
532 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
534 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
535 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
537 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
538 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
539 #undef TARGET_TRAMPOLINE_INIT
540 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
541 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
542 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
544 #undef TARGET_WARN_FUNC_RETURN
545 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
547 #undef TARGET_DEFAULT_SHORT_ENUMS
548 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
550 #undef TARGET_ALIGN_ANON_BITFIELD
551 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
553 #undef TARGET_NARROW_VOLATILE_BITFIELD
554 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
556 #undef TARGET_CXX_GUARD_TYPE
557 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
559 #undef TARGET_CXX_GUARD_MASK_BIT
560 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
562 #undef TARGET_CXX_GET_COOKIE_SIZE
563 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
565 #undef TARGET_CXX_COOKIE_HAS_SIZE
566 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
568 #undef TARGET_CXX_CDTOR_RETURNS_THIS
569 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
571 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
572 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
574 #undef TARGET_CXX_USE_AEABI_ATEXIT
575 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
577 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
578 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
579 arm_cxx_determine_class_data_visibility
581 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
582 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
584 #undef TARGET_RETURN_IN_MSB
585 #define TARGET_RETURN_IN_MSB arm_return_in_msb
587 #undef TARGET_RETURN_IN_MEMORY
588 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
590 #undef TARGET_MUST_PASS_IN_STACK
591 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
594 #undef TARGET_ASM_UNWIND_EMIT
595 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
597 /* EABI unwinding tables use a different format for the typeinfo tables. */
598 #undef TARGET_ASM_TTYPE
599 #define TARGET_ASM_TTYPE arm_output_ttype
601 #undef TARGET_ARM_EABI_UNWINDER
602 #define TARGET_ARM_EABI_UNWINDER true
604 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
605 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
607 #endif /* ARM_UNWIND_INFO */
609 #undef TARGET_ASM_INIT_SECTIONS
610 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
612 #undef TARGET_DWARF_REGISTER_SPAN
613 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
615 #undef TARGET_CANNOT_COPY_INSN_P
616 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
619 #undef TARGET_HAVE_TLS
620 #define TARGET_HAVE_TLS true
623 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
624 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
626 #undef TARGET_LEGITIMATE_CONSTANT_P
627 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
629 #undef TARGET_CANNOT_FORCE_CONST_MEM
630 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
632 #undef TARGET_MAX_ANCHOR_OFFSET
633 #define TARGET_MAX_ANCHOR_OFFSET 4095
635 /* The minimum is set such that the total size of the block
636 for a particular anchor is -4088 + 1 + 4095 bytes, which is
637 divisible by eight, ensuring natural spacing of anchors. */
638 #undef TARGET_MIN_ANCHOR_OFFSET
639 #define TARGET_MIN_ANCHOR_OFFSET -4088
641 #undef TARGET_SCHED_ISSUE_RATE
642 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
644 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
645 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
646 arm_first_cycle_multipass_dfa_lookahead
648 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
649 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
650 arm_first_cycle_multipass_dfa_lookahead_guard
652 #undef TARGET_MANGLE_TYPE
653 #define TARGET_MANGLE_TYPE arm_mangle_type
655 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
656 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
658 #undef TARGET_BUILD_BUILTIN_VA_LIST
659 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
660 #undef TARGET_EXPAND_BUILTIN_VA_START
661 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
662 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
663 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
666 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
667 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
670 #undef TARGET_LEGITIMATE_ADDRESS_P
671 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
673 #undef TARGET_PREFERRED_RELOAD_CLASS
674 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
676 #undef TARGET_PROMOTED_TYPE
677 #define TARGET_PROMOTED_TYPE arm_promoted_type
679 #undef TARGET_SCALAR_MODE_SUPPORTED_P
680 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
682 #undef TARGET_FRAME_POINTER_REQUIRED
683 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
685 #undef TARGET_CAN_ELIMINATE
686 #define TARGET_CAN_ELIMINATE arm_can_eliminate
688 #undef TARGET_CONDITIONAL_REGISTER_USAGE
689 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
691 #undef TARGET_CLASS_LIKELY_SPILLED_P
692 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
694 #undef TARGET_VECTORIZE_BUILTINS
695 #define TARGET_VECTORIZE_BUILTINS
697 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
698 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
699 arm_builtin_vectorized_function
701 #undef TARGET_VECTOR_ALIGNMENT
702 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
704 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
705 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
706 arm_vector_alignment_reachable
708 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
709 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
710 arm_builtin_support_vector_misalignment
712 #undef TARGET_PREFERRED_RENAME_CLASS
713 #define TARGET_PREFERRED_RENAME_CLASS \
714 arm_preferred_rename_class
716 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
717 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
718 arm_vectorize_vec_perm_const_ok
720 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
721 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
722 arm_builtin_vectorization_cost
723 #undef TARGET_VECTORIZE_ADD_STMT_COST
724 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
726 #undef TARGET_CANONICALIZE_COMPARISON
727 #define TARGET_CANONICALIZE_COMPARISON \
728 arm_canonicalize_comparison
730 #undef TARGET_ASAN_SHADOW_OFFSET
731 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
733 #undef MAX_INSN_PER_IT_BLOCK
734 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
736 #undef TARGET_CAN_USE_DOLOOP_P
737 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
739 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
740 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
742 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
743 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
745 #undef TARGET_SCHED_FUSION_PRIORITY
746 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
748 #undef TARGET_ASM_FUNCTION_SECTION
749 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
751 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
752 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
754 #undef TARGET_SECTION_TYPE_FLAGS
755 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
757 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
758 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
760 #undef TARGET_C_EXCESS_PRECISION
761 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
763 /* Although the architecture reserves bits 0 and 1, only the former is
764 used for ARM/Thumb ISA selection in v7 and earlier versions. */
765 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
766 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
768 struct gcc_target targetm
= TARGET_INITIALIZER
;
770 /* Obstack for minipool constant handling. */
771 static struct obstack minipool_obstack
;
772 static char * minipool_startobj
;
774 /* The maximum number of insns skipped which
775 will be conditionalised if possible. */
776 static int max_insns_skipped
= 5;
778 extern FILE * asm_out_file
;
780 /* True if we are currently building a constant table. */
781 int making_const_table
;
783 /* The processor for which instructions should be scheduled. */
784 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
786 /* The current tuning set. */
787 const struct tune_params
*current_tune
;
789 /* Which floating point hardware to schedule for. */
792 /* Used for Thumb call_via trampolines. */
793 rtx thumb_call_via_label
[14];
794 static int thumb_call_reg_needed
;
796 /* The bits in this mask specify which instruction scheduling options should
798 unsigned int tune_flags
= 0;
800 /* The highest ARM architecture version supported by the
802 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
804 /* Active target architecture and tuning. */
806 struct arm_build_target arm_active_target
;
808 /* The following are used in the arm.md file as equivalents to bits
809 in the above two flag variables. */
811 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
814 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
817 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
820 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
823 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
826 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
829 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
832 /* Nonzero if this chip supports the ARM 6K extensions. */
835 /* Nonzero if this chip supports the ARM 6KZ extensions. */
838 /* Nonzero if instructions present in ARMv6-M can be used. */
841 /* Nonzero if this chip supports the ARM 7 extensions. */
844 /* Nonzero if this chip supports the ARM 7ve extensions. */
847 /* Nonzero if instructions not present in the 'M' profile can be used. */
848 int arm_arch_notm
= 0;
850 /* Nonzero if instructions present in ARMv7E-M can be used. */
853 /* Nonzero if instructions present in ARMv8 can be used. */
856 /* Nonzero if this chip supports the ARMv8.1 extensions. */
859 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
862 /* Nonzero if this chip supports the FP16 instructions extension of ARM
864 int arm_fp16_inst
= 0;
866 /* Nonzero if this chip can benefit from load scheduling. */
867 int arm_ld_sched
= 0;
869 /* Nonzero if this chip is a StrongARM. */
870 int arm_tune_strongarm
= 0;
872 /* Nonzero if this chip supports Intel Wireless MMX technology. */
873 int arm_arch_iwmmxt
= 0;
875 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
876 int arm_arch_iwmmxt2
= 0;
878 /* Nonzero if this chip is an XScale. */
879 int arm_arch_xscale
= 0;
881 /* Nonzero if tuning for XScale */
882 int arm_tune_xscale
= 0;
884 /* Nonzero if we want to tune for stores that access the write-buffer.
885 This typically means an ARM6 or ARM7 with MMU or MPU. */
886 int arm_tune_wbuf
= 0;
888 /* Nonzero if tuning for Cortex-A9. */
889 int arm_tune_cortex_a9
= 0;
891 /* Nonzero if we should define __THUMB_INTERWORK__ in the
893 XXX This is a bit of a hack, it's intended to help work around
894 problems in GLD which doesn't understand that armv5t code is
895 interworking clean. */
896 int arm_cpp_interwork
= 0;
898 /* Nonzero if chip supports Thumb 1. */
901 /* Nonzero if chip supports Thumb 2. */
904 /* Nonzero if chip supports integer division instruction. */
905 int arm_arch_arm_hwdiv
;
906 int arm_arch_thumb_hwdiv
;
908 /* Nonzero if chip disallows volatile memory access in IT block. */
909 int arm_arch_no_volatile_ce
;
911 /* Nonzero if we should use Neon to handle 64-bits operations rather
912 than core registers. */
913 int prefer_neon_for_64bits
= 0;
915 /* Nonzero if we shouldn't use literal pools. */
916 bool arm_disable_literal_pool
= false;
918 /* The register number to be used for the PIC offset register. */
919 unsigned arm_pic_register
= INVALID_REGNUM
;
921 enum arm_pcs arm_pcs_default
;
923 /* For an explanation of these variables, see final_prescan_insn below. */
925 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
926 enum arm_cond_code arm_current_cc
;
929 int arm_target_label
;
930 /* The number of conditionally executed insns, including the current insn. */
931 int arm_condexec_count
= 0;
932 /* A bitmask specifying the patterns for the IT block.
933 Zero means do not output an IT block before this insn. */
934 int arm_condexec_mask
= 0;
935 /* The number of bits used in arm_condexec_mask. */
936 int arm_condexec_masklen
= 0;
938 /* Nonzero if chip supports the ARMv8 CRC instructions. */
939 int arm_arch_crc
= 0;
941 /* Nonzero if chip supports the ARMv8-M security extensions. */
942 int arm_arch_cmse
= 0;
944 /* Nonzero if the core has a very small, high-latency, multiply unit. */
945 int arm_m_profile_small_mul
= 0;
947 /* The condition codes of the ARM, and the inverse function. */
948 static const char * const arm_condition_codes
[] =
950 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
951 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
954 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
955 int arm_regs_in_sequence
[] =
957 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
960 #define ARM_LSL_NAME "lsl"
961 #define streq(string1, string2) (strcmp (string1, string2) == 0)
963 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
964 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
965 | (1 << PIC_OFFSET_TABLE_REGNUM)))
967 /* Initialization code. */
971 const char *const name
;
972 enum processor_type core
;
973 unsigned int tune_flags
;
975 enum base_architecture base_arch
;
976 enum isa_feature isa_bits
[isa_num_bits
];
977 const struct tune_params
*const tune
;
981 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
982 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
989 /* arm generic vectorizer costs. */
991 struct cpu_vec_costs arm_default_vec_cost
= {
992 1, /* scalar_stmt_cost. */
993 1, /* scalar load_cost. */
994 1, /* scalar_store_cost. */
995 1, /* vec_stmt_cost. */
996 1, /* vec_to_scalar_cost. */
997 1, /* scalar_to_vec_cost. */
998 1, /* vec_align_load_cost. */
999 1, /* vec_unalign_load_cost. */
1000 1, /* vec_unalign_store_cost. */
1001 1, /* vec_store_cost. */
1002 3, /* cond_taken_branch_cost. */
1003 1, /* cond_not_taken_branch_cost. */
1006 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1007 #include "aarch-cost-tables.h"
1011 const struct cpu_cost_table cortexa9_extra_costs
=
1018 COSTS_N_INSNS (1), /* shift_reg. */
1019 COSTS_N_INSNS (1), /* arith_shift. */
1020 COSTS_N_INSNS (2), /* arith_shift_reg. */
1022 COSTS_N_INSNS (1), /* log_shift_reg. */
1023 COSTS_N_INSNS (1), /* extend. */
1024 COSTS_N_INSNS (2), /* extend_arith. */
1025 COSTS_N_INSNS (1), /* bfi. */
1026 COSTS_N_INSNS (1), /* bfx. */
1030 true /* non_exec_costs_exec. */
1035 COSTS_N_INSNS (3), /* simple. */
1036 COSTS_N_INSNS (3), /* flag_setting. */
1037 COSTS_N_INSNS (2), /* extend. */
1038 COSTS_N_INSNS (3), /* add. */
1039 COSTS_N_INSNS (2), /* extend_add. */
1040 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1044 0, /* simple (N/A). */
1045 0, /* flag_setting (N/A). */
1046 COSTS_N_INSNS (4), /* extend. */
1048 COSTS_N_INSNS (4), /* extend_add. */
1054 COSTS_N_INSNS (2), /* load. */
1055 COSTS_N_INSNS (2), /* load_sign_extend. */
1056 COSTS_N_INSNS (2), /* ldrd. */
1057 COSTS_N_INSNS (2), /* ldm_1st. */
1058 1, /* ldm_regs_per_insn_1st. */
1059 2, /* ldm_regs_per_insn_subsequent. */
1060 COSTS_N_INSNS (5), /* loadf. */
1061 COSTS_N_INSNS (5), /* loadd. */
1062 COSTS_N_INSNS (1), /* load_unaligned. */
1063 COSTS_N_INSNS (2), /* store. */
1064 COSTS_N_INSNS (2), /* strd. */
1065 COSTS_N_INSNS (2), /* stm_1st. */
1066 1, /* stm_regs_per_insn_1st. */
1067 2, /* stm_regs_per_insn_subsequent. */
1068 COSTS_N_INSNS (1), /* storef. */
1069 COSTS_N_INSNS (1), /* stored. */
1070 COSTS_N_INSNS (1), /* store_unaligned. */
1071 COSTS_N_INSNS (1), /* loadv. */
1072 COSTS_N_INSNS (1) /* storev. */
1077 COSTS_N_INSNS (14), /* div. */
1078 COSTS_N_INSNS (4), /* mult. */
1079 COSTS_N_INSNS (7), /* mult_addsub. */
1080 COSTS_N_INSNS (30), /* fma. */
1081 COSTS_N_INSNS (3), /* addsub. */
1082 COSTS_N_INSNS (1), /* fpconst. */
1083 COSTS_N_INSNS (1), /* neg. */
1084 COSTS_N_INSNS (3), /* compare. */
1085 COSTS_N_INSNS (3), /* widen. */
1086 COSTS_N_INSNS (3), /* narrow. */
1087 COSTS_N_INSNS (3), /* toint. */
1088 COSTS_N_INSNS (3), /* fromint. */
1089 COSTS_N_INSNS (3) /* roundint. */
1093 COSTS_N_INSNS (24), /* div. */
1094 COSTS_N_INSNS (5), /* mult. */
1095 COSTS_N_INSNS (8), /* mult_addsub. */
1096 COSTS_N_INSNS (30), /* fma. */
1097 COSTS_N_INSNS (3), /* addsub. */
1098 COSTS_N_INSNS (1), /* fpconst. */
1099 COSTS_N_INSNS (1), /* neg. */
1100 COSTS_N_INSNS (3), /* compare. */
1101 COSTS_N_INSNS (3), /* widen. */
1102 COSTS_N_INSNS (3), /* narrow. */
1103 COSTS_N_INSNS (3), /* toint. */
1104 COSTS_N_INSNS (3), /* fromint. */
1105 COSTS_N_INSNS (3) /* roundint. */
1110 COSTS_N_INSNS (1) /* alu. */
1114 const struct cpu_cost_table cortexa8_extra_costs
=
1120 COSTS_N_INSNS (1), /* shift. */
1122 COSTS_N_INSNS (1), /* arith_shift. */
1123 0, /* arith_shift_reg. */
1124 COSTS_N_INSNS (1), /* log_shift. */
1125 0, /* log_shift_reg. */
1127 0, /* extend_arith. */
1133 true /* non_exec_costs_exec. */
1138 COSTS_N_INSNS (1), /* simple. */
1139 COSTS_N_INSNS (1), /* flag_setting. */
1140 COSTS_N_INSNS (1), /* extend. */
1141 COSTS_N_INSNS (1), /* add. */
1142 COSTS_N_INSNS (1), /* extend_add. */
1143 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1147 0, /* simple (N/A). */
1148 0, /* flag_setting (N/A). */
1149 COSTS_N_INSNS (2), /* extend. */
1151 COSTS_N_INSNS (2), /* extend_add. */
1157 COSTS_N_INSNS (1), /* load. */
1158 COSTS_N_INSNS (1), /* load_sign_extend. */
1159 COSTS_N_INSNS (1), /* ldrd. */
1160 COSTS_N_INSNS (1), /* ldm_1st. */
1161 1, /* ldm_regs_per_insn_1st. */
1162 2, /* ldm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (1), /* loadf. */
1164 COSTS_N_INSNS (1), /* loadd. */
1165 COSTS_N_INSNS (1), /* load_unaligned. */
1166 COSTS_N_INSNS (1), /* store. */
1167 COSTS_N_INSNS (1), /* strd. */
1168 COSTS_N_INSNS (1), /* stm_1st. */
1169 1, /* stm_regs_per_insn_1st. */
1170 2, /* stm_regs_per_insn_subsequent. */
1171 COSTS_N_INSNS (1), /* storef. */
1172 COSTS_N_INSNS (1), /* stored. */
1173 COSTS_N_INSNS (1), /* store_unaligned. */
1174 COSTS_N_INSNS (1), /* loadv. */
1175 COSTS_N_INSNS (1) /* storev. */
1180 COSTS_N_INSNS (36), /* div. */
1181 COSTS_N_INSNS (11), /* mult. */
1182 COSTS_N_INSNS (20), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (9), /* addsub. */
1185 COSTS_N_INSNS (3), /* fpconst. */
1186 COSTS_N_INSNS (3), /* neg. */
1187 COSTS_N_INSNS (6), /* compare. */
1188 COSTS_N_INSNS (4), /* widen. */
1189 COSTS_N_INSNS (4), /* narrow. */
1190 COSTS_N_INSNS (8), /* toint. */
1191 COSTS_N_INSNS (8), /* fromint. */
1192 COSTS_N_INSNS (8) /* roundint. */
1196 COSTS_N_INSNS (64), /* div. */
1197 COSTS_N_INSNS (16), /* mult. */
1198 COSTS_N_INSNS (25), /* mult_addsub. */
1199 COSTS_N_INSNS (30), /* fma. */
1200 COSTS_N_INSNS (9), /* addsub. */
1201 COSTS_N_INSNS (3), /* fpconst. */
1202 COSTS_N_INSNS (3), /* neg. */
1203 COSTS_N_INSNS (6), /* compare. */
1204 COSTS_N_INSNS (6), /* widen. */
1205 COSTS_N_INSNS (6), /* narrow. */
1206 COSTS_N_INSNS (8), /* toint. */
1207 COSTS_N_INSNS (8), /* fromint. */
1208 COSTS_N_INSNS (8) /* roundint. */
1213 COSTS_N_INSNS (1) /* alu. */
1217 const struct cpu_cost_table cortexa5_extra_costs
=
1223 COSTS_N_INSNS (1), /* shift. */
1224 COSTS_N_INSNS (1), /* shift_reg. */
1225 COSTS_N_INSNS (1), /* arith_shift. */
1226 COSTS_N_INSNS (1), /* arith_shift_reg. */
1227 COSTS_N_INSNS (1), /* log_shift. */
1228 COSTS_N_INSNS (1), /* log_shift_reg. */
1229 COSTS_N_INSNS (1), /* extend. */
1230 COSTS_N_INSNS (1), /* extend_arith. */
1231 COSTS_N_INSNS (1), /* bfi. */
1232 COSTS_N_INSNS (1), /* bfx. */
1233 COSTS_N_INSNS (1), /* clz. */
1234 COSTS_N_INSNS (1), /* rev. */
1236 true /* non_exec_costs_exec. */
1243 COSTS_N_INSNS (1), /* flag_setting. */
1244 COSTS_N_INSNS (1), /* extend. */
1245 COSTS_N_INSNS (1), /* add. */
1246 COSTS_N_INSNS (1), /* extend_add. */
1247 COSTS_N_INSNS (7) /* idiv. */
1251 0, /* simple (N/A). */
1252 0, /* flag_setting (N/A). */
1253 COSTS_N_INSNS (1), /* extend. */
1255 COSTS_N_INSNS (2), /* extend_add. */
1261 COSTS_N_INSNS (1), /* load. */
1262 COSTS_N_INSNS (1), /* load_sign_extend. */
1263 COSTS_N_INSNS (6), /* ldrd. */
1264 COSTS_N_INSNS (1), /* ldm_1st. */
1265 1, /* ldm_regs_per_insn_1st. */
1266 2, /* ldm_regs_per_insn_subsequent. */
1267 COSTS_N_INSNS (2), /* loadf. */
1268 COSTS_N_INSNS (4), /* loadd. */
1269 COSTS_N_INSNS (1), /* load_unaligned. */
1270 COSTS_N_INSNS (1), /* store. */
1271 COSTS_N_INSNS (3), /* strd. */
1272 COSTS_N_INSNS (1), /* stm_1st. */
1273 1, /* stm_regs_per_insn_1st. */
1274 2, /* stm_regs_per_insn_subsequent. */
1275 COSTS_N_INSNS (2), /* storef. */
1276 COSTS_N_INSNS (2), /* stored. */
1277 COSTS_N_INSNS (1), /* store_unaligned. */
1278 COSTS_N_INSNS (1), /* loadv. */
1279 COSTS_N_INSNS (1) /* storev. */
1284 COSTS_N_INSNS (15), /* div. */
1285 COSTS_N_INSNS (3), /* mult. */
1286 COSTS_N_INSNS (7), /* mult_addsub. */
1287 COSTS_N_INSNS (7), /* fma. */
1288 COSTS_N_INSNS (3), /* addsub. */
1289 COSTS_N_INSNS (3), /* fpconst. */
1290 COSTS_N_INSNS (3), /* neg. */
1291 COSTS_N_INSNS (3), /* compare. */
1292 COSTS_N_INSNS (3), /* widen. */
1293 COSTS_N_INSNS (3), /* narrow. */
1294 COSTS_N_INSNS (3), /* toint. */
1295 COSTS_N_INSNS (3), /* fromint. */
1296 COSTS_N_INSNS (3) /* roundint. */
1300 COSTS_N_INSNS (30), /* div. */
1301 COSTS_N_INSNS (6), /* mult. */
1302 COSTS_N_INSNS (10), /* mult_addsub. */
1303 COSTS_N_INSNS (7), /* fma. */
1304 COSTS_N_INSNS (3), /* addsub. */
1305 COSTS_N_INSNS (3), /* fpconst. */
1306 COSTS_N_INSNS (3), /* neg. */
1307 COSTS_N_INSNS (3), /* compare. */
1308 COSTS_N_INSNS (3), /* widen. */
1309 COSTS_N_INSNS (3), /* narrow. */
1310 COSTS_N_INSNS (3), /* toint. */
1311 COSTS_N_INSNS (3), /* fromint. */
1312 COSTS_N_INSNS (3) /* roundint. */
1317 COSTS_N_INSNS (1) /* alu. */
1322 const struct cpu_cost_table cortexa7_extra_costs
=
1328 COSTS_N_INSNS (1), /* shift. */
1329 COSTS_N_INSNS (1), /* shift_reg. */
1330 COSTS_N_INSNS (1), /* arith_shift. */
1331 COSTS_N_INSNS (1), /* arith_shift_reg. */
1332 COSTS_N_INSNS (1), /* log_shift. */
1333 COSTS_N_INSNS (1), /* log_shift_reg. */
1334 COSTS_N_INSNS (1), /* extend. */
1335 COSTS_N_INSNS (1), /* extend_arith. */
1336 COSTS_N_INSNS (1), /* bfi. */
1337 COSTS_N_INSNS (1), /* bfx. */
1338 COSTS_N_INSNS (1), /* clz. */
1339 COSTS_N_INSNS (1), /* rev. */
1341 true /* non_exec_costs_exec. */
1348 COSTS_N_INSNS (1), /* flag_setting. */
1349 COSTS_N_INSNS (1), /* extend. */
1350 COSTS_N_INSNS (1), /* add. */
1351 COSTS_N_INSNS (1), /* extend_add. */
1352 COSTS_N_INSNS (7) /* idiv. */
1356 0, /* simple (N/A). */
1357 0, /* flag_setting (N/A). */
1358 COSTS_N_INSNS (1), /* extend. */
1360 COSTS_N_INSNS (2), /* extend_add. */
1366 COSTS_N_INSNS (1), /* load. */
1367 COSTS_N_INSNS (1), /* load_sign_extend. */
1368 COSTS_N_INSNS (3), /* ldrd. */
1369 COSTS_N_INSNS (1), /* ldm_1st. */
1370 1, /* ldm_regs_per_insn_1st. */
1371 2, /* ldm_regs_per_insn_subsequent. */
1372 COSTS_N_INSNS (2), /* loadf. */
1373 COSTS_N_INSNS (2), /* loadd. */
1374 COSTS_N_INSNS (1), /* load_unaligned. */
1375 COSTS_N_INSNS (1), /* store. */
1376 COSTS_N_INSNS (3), /* strd. */
1377 COSTS_N_INSNS (1), /* stm_1st. */
1378 1, /* stm_regs_per_insn_1st. */
1379 2, /* stm_regs_per_insn_subsequent. */
1380 COSTS_N_INSNS (2), /* storef. */
1381 COSTS_N_INSNS (2), /* stored. */
1382 COSTS_N_INSNS (1), /* store_unaligned. */
1383 COSTS_N_INSNS (1), /* loadv. */
1384 COSTS_N_INSNS (1) /* storev. */
1389 COSTS_N_INSNS (15), /* div. */
1390 COSTS_N_INSNS (3), /* mult. */
1391 COSTS_N_INSNS (7), /* mult_addsub. */
1392 COSTS_N_INSNS (7), /* fma. */
1393 COSTS_N_INSNS (3), /* addsub. */
1394 COSTS_N_INSNS (3), /* fpconst. */
1395 COSTS_N_INSNS (3), /* neg. */
1396 COSTS_N_INSNS (3), /* compare. */
1397 COSTS_N_INSNS (3), /* widen. */
1398 COSTS_N_INSNS (3), /* narrow. */
1399 COSTS_N_INSNS (3), /* toint. */
1400 COSTS_N_INSNS (3), /* fromint. */
1401 COSTS_N_INSNS (3) /* roundint. */
1405 COSTS_N_INSNS (30), /* div. */
1406 COSTS_N_INSNS (6), /* mult. */
1407 COSTS_N_INSNS (10), /* mult_addsub. */
1408 COSTS_N_INSNS (7), /* fma. */
1409 COSTS_N_INSNS (3), /* addsub. */
1410 COSTS_N_INSNS (3), /* fpconst. */
1411 COSTS_N_INSNS (3), /* neg. */
1412 COSTS_N_INSNS (3), /* compare. */
1413 COSTS_N_INSNS (3), /* widen. */
1414 COSTS_N_INSNS (3), /* narrow. */
1415 COSTS_N_INSNS (3), /* toint. */
1416 COSTS_N_INSNS (3), /* fromint. */
1417 COSTS_N_INSNS (3) /* roundint. */
1422 COSTS_N_INSNS (1) /* alu. */
1426 const struct cpu_cost_table cortexa12_extra_costs
=
1433 COSTS_N_INSNS (1), /* shift_reg. */
1434 COSTS_N_INSNS (1), /* arith_shift. */
1435 COSTS_N_INSNS (1), /* arith_shift_reg. */
1436 COSTS_N_INSNS (1), /* log_shift. */
1437 COSTS_N_INSNS (1), /* log_shift_reg. */
1439 COSTS_N_INSNS (1), /* extend_arith. */
1441 COSTS_N_INSNS (1), /* bfx. */
1442 COSTS_N_INSNS (1), /* clz. */
1443 COSTS_N_INSNS (1), /* rev. */
1445 true /* non_exec_costs_exec. */
1450 COSTS_N_INSNS (2), /* simple. */
1451 COSTS_N_INSNS (3), /* flag_setting. */
1452 COSTS_N_INSNS (2), /* extend. */
1453 COSTS_N_INSNS (3), /* add. */
1454 COSTS_N_INSNS (2), /* extend_add. */
1455 COSTS_N_INSNS (18) /* idiv. */
1459 0, /* simple (N/A). */
1460 0, /* flag_setting (N/A). */
1461 COSTS_N_INSNS (3), /* extend. */
1463 COSTS_N_INSNS (3), /* extend_add. */
1469 COSTS_N_INSNS (3), /* load. */
1470 COSTS_N_INSNS (3), /* load_sign_extend. */
1471 COSTS_N_INSNS (3), /* ldrd. */
1472 COSTS_N_INSNS (3), /* ldm_1st. */
1473 1, /* ldm_regs_per_insn_1st. */
1474 2, /* ldm_regs_per_insn_subsequent. */
1475 COSTS_N_INSNS (3), /* loadf. */
1476 COSTS_N_INSNS (3), /* loadd. */
1477 0, /* load_unaligned. */
1481 1, /* stm_regs_per_insn_1st. */
1482 2, /* stm_regs_per_insn_subsequent. */
1483 COSTS_N_INSNS (2), /* storef. */
1484 COSTS_N_INSNS (2), /* stored. */
1485 0, /* store_unaligned. */
1486 COSTS_N_INSNS (1), /* loadv. */
1487 COSTS_N_INSNS (1) /* storev. */
1492 COSTS_N_INSNS (17), /* div. */
1493 COSTS_N_INSNS (4), /* mult. */
1494 COSTS_N_INSNS (8), /* mult_addsub. */
1495 COSTS_N_INSNS (8), /* fma. */
1496 COSTS_N_INSNS (4), /* addsub. */
1497 COSTS_N_INSNS (2), /* fpconst. */
1498 COSTS_N_INSNS (2), /* neg. */
1499 COSTS_N_INSNS (2), /* compare. */
1500 COSTS_N_INSNS (4), /* widen. */
1501 COSTS_N_INSNS (4), /* narrow. */
1502 COSTS_N_INSNS (4), /* toint. */
1503 COSTS_N_INSNS (4), /* fromint. */
1504 COSTS_N_INSNS (4) /* roundint. */
1508 COSTS_N_INSNS (31), /* div. */
1509 COSTS_N_INSNS (4), /* mult. */
1510 COSTS_N_INSNS (8), /* mult_addsub. */
1511 COSTS_N_INSNS (8), /* fma. */
1512 COSTS_N_INSNS (4), /* addsub. */
1513 COSTS_N_INSNS (2), /* fpconst. */
1514 COSTS_N_INSNS (2), /* neg. */
1515 COSTS_N_INSNS (2), /* compare. */
1516 COSTS_N_INSNS (4), /* widen. */
1517 COSTS_N_INSNS (4), /* narrow. */
1518 COSTS_N_INSNS (4), /* toint. */
1519 COSTS_N_INSNS (4), /* fromint. */
1520 COSTS_N_INSNS (4) /* roundint. */
1525 COSTS_N_INSNS (1) /* alu. */
1529 const struct cpu_cost_table cortexa15_extra_costs
=
1537 COSTS_N_INSNS (1), /* arith_shift. */
1538 COSTS_N_INSNS (1), /* arith_shift_reg. */
1539 COSTS_N_INSNS (1), /* log_shift. */
1540 COSTS_N_INSNS (1), /* log_shift_reg. */
1542 COSTS_N_INSNS (1), /* extend_arith. */
1543 COSTS_N_INSNS (1), /* bfi. */
1548 true /* non_exec_costs_exec. */
1553 COSTS_N_INSNS (2), /* simple. */
1554 COSTS_N_INSNS (3), /* flag_setting. */
1555 COSTS_N_INSNS (2), /* extend. */
1556 COSTS_N_INSNS (2), /* add. */
1557 COSTS_N_INSNS (2), /* extend_add. */
1558 COSTS_N_INSNS (18) /* idiv. */
1562 0, /* simple (N/A). */
1563 0, /* flag_setting (N/A). */
1564 COSTS_N_INSNS (3), /* extend. */
1566 COSTS_N_INSNS (3), /* extend_add. */
1572 COSTS_N_INSNS (3), /* load. */
1573 COSTS_N_INSNS (3), /* load_sign_extend. */
1574 COSTS_N_INSNS (3), /* ldrd. */
1575 COSTS_N_INSNS (4), /* ldm_1st. */
1576 1, /* ldm_regs_per_insn_1st. */
1577 2, /* ldm_regs_per_insn_subsequent. */
1578 COSTS_N_INSNS (4), /* loadf. */
1579 COSTS_N_INSNS (4), /* loadd. */
1580 0, /* load_unaligned. */
1583 COSTS_N_INSNS (1), /* stm_1st. */
1584 1, /* stm_regs_per_insn_1st. */
1585 2, /* stm_regs_per_insn_subsequent. */
1588 0, /* store_unaligned. */
1589 COSTS_N_INSNS (1), /* loadv. */
1590 COSTS_N_INSNS (1) /* storev. */
1595 COSTS_N_INSNS (17), /* div. */
1596 COSTS_N_INSNS (4), /* mult. */
1597 COSTS_N_INSNS (8), /* mult_addsub. */
1598 COSTS_N_INSNS (8), /* fma. */
1599 COSTS_N_INSNS (4), /* addsub. */
1600 COSTS_N_INSNS (2), /* fpconst. */
1601 COSTS_N_INSNS (2), /* neg. */
1602 COSTS_N_INSNS (5), /* compare. */
1603 COSTS_N_INSNS (4), /* widen. */
1604 COSTS_N_INSNS (4), /* narrow. */
1605 COSTS_N_INSNS (4), /* toint. */
1606 COSTS_N_INSNS (4), /* fromint. */
1607 COSTS_N_INSNS (4) /* roundint. */
1611 COSTS_N_INSNS (31), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1628 COSTS_N_INSNS (1) /* alu. */
1632 const struct cpu_cost_table v7m_extra_costs
=
1640 0, /* arith_shift. */
1641 COSTS_N_INSNS (1), /* arith_shift_reg. */
1643 COSTS_N_INSNS (1), /* log_shift_reg. */
1645 COSTS_N_INSNS (1), /* extend_arith. */
1650 COSTS_N_INSNS (1), /* non_exec. */
1651 false /* non_exec_costs_exec. */
1656 COSTS_N_INSNS (1), /* simple. */
1657 COSTS_N_INSNS (1), /* flag_setting. */
1658 COSTS_N_INSNS (2), /* extend. */
1659 COSTS_N_INSNS (1), /* add. */
1660 COSTS_N_INSNS (3), /* extend_add. */
1661 COSTS_N_INSNS (8) /* idiv. */
1665 0, /* simple (N/A). */
1666 0, /* flag_setting (N/A). */
1667 COSTS_N_INSNS (2), /* extend. */
1669 COSTS_N_INSNS (3), /* extend_add. */
1675 COSTS_N_INSNS (2), /* load. */
1676 0, /* load_sign_extend. */
1677 COSTS_N_INSNS (3), /* ldrd. */
1678 COSTS_N_INSNS (2), /* ldm_1st. */
1679 1, /* ldm_regs_per_insn_1st. */
1680 1, /* ldm_regs_per_insn_subsequent. */
1681 COSTS_N_INSNS (2), /* loadf. */
1682 COSTS_N_INSNS (3), /* loadd. */
1683 COSTS_N_INSNS (1), /* load_unaligned. */
1684 COSTS_N_INSNS (2), /* store. */
1685 COSTS_N_INSNS (3), /* strd. */
1686 COSTS_N_INSNS (2), /* stm_1st. */
1687 1, /* stm_regs_per_insn_1st. */
1688 1, /* stm_regs_per_insn_subsequent. */
1689 COSTS_N_INSNS (2), /* storef. */
1690 COSTS_N_INSNS (3), /* stored. */
1691 COSTS_N_INSNS (1), /* store_unaligned. */
1692 COSTS_N_INSNS (1), /* loadv. */
1693 COSTS_N_INSNS (1) /* storev. */
1698 COSTS_N_INSNS (7), /* div. */
1699 COSTS_N_INSNS (2), /* mult. */
1700 COSTS_N_INSNS (5), /* mult_addsub. */
1701 COSTS_N_INSNS (3), /* fma. */
1702 COSTS_N_INSNS (1), /* addsub. */
1714 COSTS_N_INSNS (15), /* div. */
1715 COSTS_N_INSNS (5), /* mult. */
1716 COSTS_N_INSNS (7), /* mult_addsub. */
1717 COSTS_N_INSNS (7), /* fma. */
1718 COSTS_N_INSNS (3), /* addsub. */
1731 COSTS_N_INSNS (1) /* alu. */
1735 const struct tune_params arm_slowmul_tune
=
1737 &generic_extra_costs
, /* Insn extra costs. */
1738 NULL
, /* Sched adj cost. */
1739 arm_default_branch_cost
,
1740 &arm_default_vec_cost
,
1741 3, /* Constant limit. */
1742 5, /* Max cond insns. */
1743 8, /* Memset max inline. */
1744 1, /* Issue rate. */
1745 ARM_PREFETCH_NOT_BENEFICIAL
,
1746 tune_params::PREF_CONST_POOL_TRUE
,
1747 tune_params::PREF_LDRD_FALSE
,
1748 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1750 tune_params::DISPARAGE_FLAGS_NEITHER
,
1751 tune_params::PREF_NEON_64_FALSE
,
1752 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1753 tune_params::FUSE_NOTHING
,
1754 tune_params::SCHED_AUTOPREF_OFF
1757 const struct tune_params arm_fastmul_tune
=
1759 &generic_extra_costs
, /* Insn extra costs. */
1760 NULL
, /* Sched adj cost. */
1761 arm_default_branch_cost
,
1762 &arm_default_vec_cost
,
1763 1, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 8, /* Memset max inline. */
1766 1, /* Issue rate. */
1767 ARM_PREFETCH_NOT_BENEFICIAL
,
1768 tune_params::PREF_CONST_POOL_TRUE
,
1769 tune_params::PREF_LDRD_FALSE
,
1770 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1771 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1772 tune_params::DISPARAGE_FLAGS_NEITHER
,
1773 tune_params::PREF_NEON_64_FALSE
,
1774 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1775 tune_params::FUSE_NOTHING
,
1776 tune_params::SCHED_AUTOPREF_OFF
1779 /* StrongARM has early execution of branches, so a sequence that is worth
1780 skipping is shorter. Set max_insns_skipped to a lower value. */
1782 const struct tune_params arm_strongarm_tune
=
1784 &generic_extra_costs
, /* Insn extra costs. */
1785 NULL
, /* Sched adj cost. */
1786 arm_default_branch_cost
,
1787 &arm_default_vec_cost
,
1788 1, /* Constant limit. */
1789 3, /* Max cond insns. */
1790 8, /* Memset max inline. */
1791 1, /* Issue rate. */
1792 ARM_PREFETCH_NOT_BENEFICIAL
,
1793 tune_params::PREF_CONST_POOL_TRUE
,
1794 tune_params::PREF_LDRD_FALSE
,
1795 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1796 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1797 tune_params::DISPARAGE_FLAGS_NEITHER
,
1798 tune_params::PREF_NEON_64_FALSE
,
1799 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1800 tune_params::FUSE_NOTHING
,
1801 tune_params::SCHED_AUTOPREF_OFF
1804 const struct tune_params arm_xscale_tune
=
1806 &generic_extra_costs
, /* Insn extra costs. */
1807 xscale_sched_adjust_cost
,
1808 arm_default_branch_cost
,
1809 &arm_default_vec_cost
,
1810 2, /* Constant limit. */
1811 3, /* Max cond insns. */
1812 8, /* Memset max inline. */
1813 1, /* Issue rate. */
1814 ARM_PREFETCH_NOT_BENEFICIAL
,
1815 tune_params::PREF_CONST_POOL_TRUE
,
1816 tune_params::PREF_LDRD_FALSE
,
1817 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1819 tune_params::DISPARAGE_FLAGS_NEITHER
,
1820 tune_params::PREF_NEON_64_FALSE
,
1821 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1822 tune_params::FUSE_NOTHING
,
1823 tune_params::SCHED_AUTOPREF_OFF
1826 const struct tune_params arm_9e_tune
=
1828 &generic_extra_costs
, /* Insn extra costs. */
1829 NULL
, /* Sched adj cost. */
1830 arm_default_branch_cost
,
1831 &arm_default_vec_cost
,
1832 1, /* Constant limit. */
1833 5, /* Max cond insns. */
1834 8, /* Memset max inline. */
1835 1, /* Issue rate. */
1836 ARM_PREFETCH_NOT_BENEFICIAL
,
1837 tune_params::PREF_CONST_POOL_TRUE
,
1838 tune_params::PREF_LDRD_FALSE
,
1839 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1840 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1841 tune_params::DISPARAGE_FLAGS_NEITHER
,
1842 tune_params::PREF_NEON_64_FALSE
,
1843 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1844 tune_params::FUSE_NOTHING
,
1845 tune_params::SCHED_AUTOPREF_OFF
1848 const struct tune_params arm_marvell_pj4_tune
=
1850 &generic_extra_costs
, /* Insn extra costs. */
1851 NULL
, /* Sched adj cost. */
1852 arm_default_branch_cost
,
1853 &arm_default_vec_cost
,
1854 1, /* Constant limit. */
1855 5, /* Max cond insns. */
1856 8, /* Memset max inline. */
1857 2, /* Issue rate. */
1858 ARM_PREFETCH_NOT_BENEFICIAL
,
1859 tune_params::PREF_CONST_POOL_TRUE
,
1860 tune_params::PREF_LDRD_FALSE
,
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1863 tune_params::DISPARAGE_FLAGS_NEITHER
,
1864 tune_params::PREF_NEON_64_FALSE
,
1865 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1866 tune_params::FUSE_NOTHING
,
1867 tune_params::SCHED_AUTOPREF_OFF
1870 const struct tune_params arm_v6t2_tune
=
1872 &generic_extra_costs
, /* Insn extra costs. */
1873 NULL
, /* Sched adj cost. */
1874 arm_default_branch_cost
,
1875 &arm_default_vec_cost
,
1876 1, /* Constant limit. */
1877 5, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 1, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL
,
1881 tune_params::PREF_CONST_POOL_FALSE
,
1882 tune_params::PREF_LDRD_FALSE
,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER
,
1886 tune_params::PREF_NEON_64_FALSE
,
1887 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1888 tune_params::FUSE_NOTHING
,
1889 tune_params::SCHED_AUTOPREF_OFF
1893 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1894 const struct tune_params arm_cortex_tune
=
1896 &generic_extra_costs
,
1897 NULL
, /* Sched adj cost. */
1898 arm_default_branch_cost
,
1899 &arm_default_vec_cost
,
1900 1, /* Constant limit. */
1901 5, /* Max cond insns. */
1902 8, /* Memset max inline. */
1903 2, /* Issue rate. */
1904 ARM_PREFETCH_NOT_BENEFICIAL
,
1905 tune_params::PREF_CONST_POOL_FALSE
,
1906 tune_params::PREF_LDRD_FALSE
,
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1909 tune_params::DISPARAGE_FLAGS_NEITHER
,
1910 tune_params::PREF_NEON_64_FALSE
,
1911 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1912 tune_params::FUSE_NOTHING
,
1913 tune_params::SCHED_AUTOPREF_OFF
1916 const struct tune_params arm_cortex_a8_tune
=
1918 &cortexa8_extra_costs
,
1919 NULL
, /* Sched adj cost. */
1920 arm_default_branch_cost
,
1921 &arm_default_vec_cost
,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 2, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL
,
1927 tune_params::PREF_CONST_POOL_FALSE
,
1928 tune_params::PREF_LDRD_FALSE
,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER
,
1932 tune_params::PREF_NEON_64_FALSE
,
1933 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1934 tune_params::FUSE_NOTHING
,
1935 tune_params::SCHED_AUTOPREF_OFF
1938 const struct tune_params arm_cortex_a7_tune
=
1940 &cortexa7_extra_costs
,
1941 NULL
, /* Sched adj cost. */
1942 arm_default_branch_cost
,
1943 &arm_default_vec_cost
,
1944 1, /* Constant limit. */
1945 5, /* Max cond insns. */
1946 8, /* Memset max inline. */
1947 2, /* Issue rate. */
1948 ARM_PREFETCH_NOT_BENEFICIAL
,
1949 tune_params::PREF_CONST_POOL_FALSE
,
1950 tune_params::PREF_LDRD_FALSE
,
1951 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1953 tune_params::DISPARAGE_FLAGS_NEITHER
,
1954 tune_params::PREF_NEON_64_FALSE
,
1955 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1956 tune_params::FUSE_NOTHING
,
1957 tune_params::SCHED_AUTOPREF_OFF
1960 const struct tune_params arm_cortex_a15_tune
=
1962 &cortexa15_extra_costs
,
1963 NULL
, /* Sched adj cost. */
1964 arm_default_branch_cost
,
1965 &arm_default_vec_cost
,
1966 1, /* Constant limit. */
1967 2, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 3, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL
,
1971 tune_params::PREF_CONST_POOL_FALSE
,
1972 tune_params::PREF_LDRD_TRUE
,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_ALL
,
1976 tune_params::PREF_NEON_64_FALSE
,
1977 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1978 tune_params::FUSE_NOTHING
,
1979 tune_params::SCHED_AUTOPREF_FULL
1982 const struct tune_params arm_cortex_a35_tune
=
1984 &cortexa53_extra_costs
,
1985 NULL
, /* Sched adj cost. */
1986 arm_default_branch_cost
,
1987 &arm_default_vec_cost
,
1988 1, /* Constant limit. */
1989 5, /* Max cond insns. */
1990 8, /* Memset max inline. */
1991 1, /* Issue rate. */
1992 ARM_PREFETCH_NOT_BENEFICIAL
,
1993 tune_params::PREF_CONST_POOL_FALSE
,
1994 tune_params::PREF_LDRD_FALSE
,
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1997 tune_params::DISPARAGE_FLAGS_NEITHER
,
1998 tune_params::PREF_NEON_64_FALSE
,
1999 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2000 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2001 tune_params::SCHED_AUTOPREF_OFF
2004 const struct tune_params arm_cortex_a53_tune
=
2006 &cortexa53_extra_costs
,
2007 NULL
, /* Sched adj cost. */
2008 arm_default_branch_cost
,
2009 &arm_default_vec_cost
,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 2, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL
,
2015 tune_params::PREF_CONST_POOL_FALSE
,
2016 tune_params::PREF_LDRD_FALSE
,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER
,
2020 tune_params::PREF_NEON_64_FALSE
,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2022 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2023 tune_params::SCHED_AUTOPREF_OFF
2026 const struct tune_params arm_cortex_a57_tune
=
2028 &cortexa57_extra_costs
,
2029 NULL
, /* Sched adj cost. */
2030 arm_default_branch_cost
,
2031 &arm_default_vec_cost
,
2032 1, /* Constant limit. */
2033 2, /* Max cond insns. */
2034 8, /* Memset max inline. */
2035 3, /* Issue rate. */
2036 ARM_PREFETCH_NOT_BENEFICIAL
,
2037 tune_params::PREF_CONST_POOL_FALSE
,
2038 tune_params::PREF_LDRD_TRUE
,
2039 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2041 tune_params::DISPARAGE_FLAGS_ALL
,
2042 tune_params::PREF_NEON_64_FALSE
,
2043 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2044 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2045 tune_params::SCHED_AUTOPREF_FULL
2048 const struct tune_params arm_exynosm1_tune
=
2050 &exynosm1_extra_costs
,
2051 NULL
, /* Sched adj cost. */
2052 arm_default_branch_cost
,
2053 &arm_default_vec_cost
,
2054 1, /* Constant limit. */
2055 2, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 3, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL
,
2059 tune_params::PREF_CONST_POOL_FALSE
,
2060 tune_params::PREF_LDRD_TRUE
,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_ALL
,
2064 tune_params::PREF_NEON_64_FALSE
,
2065 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2066 tune_params::FUSE_NOTHING
,
2067 tune_params::SCHED_AUTOPREF_OFF
2070 const struct tune_params arm_xgene1_tune
=
2072 &xgene1_extra_costs
,
2073 NULL
, /* Sched adj cost. */
2074 arm_default_branch_cost
,
2075 &arm_default_vec_cost
,
2076 1, /* Constant limit. */
2077 2, /* Max cond insns. */
2078 32, /* Memset max inline. */
2079 4, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL
,
2081 tune_params::PREF_CONST_POOL_FALSE
,
2082 tune_params::PREF_LDRD_TRUE
,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_ALL
,
2086 tune_params::PREF_NEON_64_FALSE
,
2087 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2088 tune_params::FUSE_NOTHING
,
2089 tune_params::SCHED_AUTOPREF_OFF
2092 const struct tune_params arm_qdf24xx_tune
=
2094 &qdf24xx_extra_costs
,
2095 NULL
, /* Scheduler cost adjustment. */
2096 arm_default_branch_cost
,
2097 &arm_default_vec_cost
, /* Vectorizer costs. */
2098 1, /* Constant limit. */
2099 2, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 4, /* Issue rate. */
2102 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2103 tune_params::PREF_CONST_POOL_FALSE
,
2104 tune_params::PREF_LDRD_TRUE
,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_ALL
,
2108 tune_params::PREF_NEON_64_FALSE
,
2109 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2110 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2111 tune_params::SCHED_AUTOPREF_FULL
2114 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2115 less appealing. Set max_insns_skipped to a low value. */
2117 const struct tune_params arm_cortex_a5_tune
=
2119 &cortexa5_extra_costs
,
2120 NULL
, /* Sched adj cost. */
2121 arm_cortex_a5_branch_cost
,
2122 &arm_default_vec_cost
,
2123 1, /* Constant limit. */
2124 1, /* Max cond insns. */
2125 8, /* Memset max inline. */
2126 2, /* Issue rate. */
2127 ARM_PREFETCH_NOT_BENEFICIAL
,
2128 tune_params::PREF_CONST_POOL_FALSE
,
2129 tune_params::PREF_LDRD_FALSE
,
2130 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2132 tune_params::DISPARAGE_FLAGS_NEITHER
,
2133 tune_params::PREF_NEON_64_FALSE
,
2134 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2135 tune_params::FUSE_NOTHING
,
2136 tune_params::SCHED_AUTOPREF_OFF
2139 const struct tune_params arm_cortex_a9_tune
=
2141 &cortexa9_extra_costs
,
2142 cortex_a9_sched_adjust_cost
,
2143 arm_default_branch_cost
,
2144 &arm_default_vec_cost
,
2145 1, /* Constant limit. */
2146 5, /* Max cond insns. */
2147 8, /* Memset max inline. */
2148 2, /* Issue rate. */
2149 ARM_PREFETCH_BENEFICIAL(4,32,32),
2150 tune_params::PREF_CONST_POOL_FALSE
,
2151 tune_params::PREF_LDRD_FALSE
,
2152 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2153 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2154 tune_params::DISPARAGE_FLAGS_NEITHER
,
2155 tune_params::PREF_NEON_64_FALSE
,
2156 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2157 tune_params::FUSE_NOTHING
,
2158 tune_params::SCHED_AUTOPREF_OFF
2161 const struct tune_params arm_cortex_a12_tune
=
2163 &cortexa12_extra_costs
,
2164 NULL
, /* Sched adj cost. */
2165 arm_default_branch_cost
,
2166 &arm_default_vec_cost
, /* Vectorizer costs. */
2167 1, /* Constant limit. */
2168 2, /* Max cond insns. */
2169 8, /* Memset max inline. */
2170 2, /* Issue rate. */
2171 ARM_PREFETCH_NOT_BENEFICIAL
,
2172 tune_params::PREF_CONST_POOL_FALSE
,
2173 tune_params::PREF_LDRD_TRUE
,
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2175 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2176 tune_params::DISPARAGE_FLAGS_ALL
,
2177 tune_params::PREF_NEON_64_FALSE
,
2178 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2179 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2180 tune_params::SCHED_AUTOPREF_OFF
2183 const struct tune_params arm_cortex_a73_tune
=
2185 &cortexa57_extra_costs
,
2186 NULL
, /* Sched adj cost. */
2187 arm_default_branch_cost
,
2188 &arm_default_vec_cost
, /* Vectorizer costs. */
2189 1, /* Constant limit. */
2190 2, /* Max cond insns. */
2191 8, /* Memset max inline. */
2192 2, /* Issue rate. */
2193 ARM_PREFETCH_NOT_BENEFICIAL
,
2194 tune_params::PREF_CONST_POOL_FALSE
,
2195 tune_params::PREF_LDRD_TRUE
,
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2198 tune_params::DISPARAGE_FLAGS_ALL
,
2199 tune_params::PREF_NEON_64_FALSE
,
2200 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2201 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2202 tune_params::SCHED_AUTOPREF_FULL
2205 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2206 cycle to execute each. An LDR from the constant pool also takes two cycles
2207 to execute, but mildly increases pipelining opportunity (consecutive
2208 loads/stores can be pipelined together, saving one cycle), and may also
2209 improve icache utilisation. Hence we prefer the constant pool for such
2212 const struct tune_params arm_v7m_tune
=
2215 NULL
, /* Sched adj cost. */
2216 arm_cortex_m_branch_cost
,
2217 &arm_default_vec_cost
,
2218 1, /* Constant limit. */
2219 2, /* Max cond insns. */
2220 8, /* Memset max inline. */
2221 1, /* Issue rate. */
2222 ARM_PREFETCH_NOT_BENEFICIAL
,
2223 tune_params::PREF_CONST_POOL_TRUE
,
2224 tune_params::PREF_LDRD_FALSE
,
2225 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2226 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2227 tune_params::DISPARAGE_FLAGS_NEITHER
,
2228 tune_params::PREF_NEON_64_FALSE
,
2229 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2230 tune_params::FUSE_NOTHING
,
2231 tune_params::SCHED_AUTOPREF_OFF
2234 /* Cortex-M7 tuning. */
2236 const struct tune_params arm_cortex_m7_tune
=
2239 NULL
, /* Sched adj cost. */
2240 arm_cortex_m7_branch_cost
,
2241 &arm_default_vec_cost
,
2242 0, /* Constant limit. */
2243 1, /* Max cond insns. */
2244 8, /* Memset max inline. */
2245 2, /* Issue rate. */
2246 ARM_PREFETCH_NOT_BENEFICIAL
,
2247 tune_params::PREF_CONST_POOL_TRUE
,
2248 tune_params::PREF_LDRD_FALSE
,
2249 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2251 tune_params::DISPARAGE_FLAGS_NEITHER
,
2252 tune_params::PREF_NEON_64_FALSE
,
2253 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2254 tune_params::FUSE_NOTHING
,
2255 tune_params::SCHED_AUTOPREF_OFF
2258 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2259 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2261 const struct tune_params arm_v6m_tune
=
2263 &generic_extra_costs
, /* Insn extra costs. */
2264 NULL
, /* Sched adj cost. */
2265 arm_default_branch_cost
,
2266 &arm_default_vec_cost
, /* Vectorizer costs. */
2267 1, /* Constant limit. */
2268 5, /* Max cond insns. */
2269 8, /* Memset max inline. */
2270 1, /* Issue rate. */
2271 ARM_PREFETCH_NOT_BENEFICIAL
,
2272 tune_params::PREF_CONST_POOL_FALSE
,
2273 tune_params::PREF_LDRD_FALSE
,
2274 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2275 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2276 tune_params::DISPARAGE_FLAGS_NEITHER
,
2277 tune_params::PREF_NEON_64_FALSE
,
2278 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2279 tune_params::FUSE_NOTHING
,
2280 tune_params::SCHED_AUTOPREF_OFF
2283 const struct tune_params arm_fa726te_tune
=
2285 &generic_extra_costs
, /* Insn extra costs. */
2286 fa726te_sched_adjust_cost
,
2287 arm_default_branch_cost
,
2288 &arm_default_vec_cost
,
2289 1, /* Constant limit. */
2290 5, /* Max cond insns. */
2291 8, /* Memset max inline. */
2292 2, /* Issue rate. */
2293 ARM_PREFETCH_NOT_BENEFICIAL
,
2294 tune_params::PREF_CONST_POOL_TRUE
,
2295 tune_params::PREF_LDRD_FALSE
,
2296 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2297 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2298 tune_params::DISPARAGE_FLAGS_NEITHER
,
2299 tune_params::PREF_NEON_64_FALSE
,
2300 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2301 tune_params::FUSE_NOTHING
,
2302 tune_params::SCHED_AUTOPREF_OFF
2305 /* Auto-generated CPU, FPU and architecture tables. */
2306 #include "arm-cpu-data.h"
2308 /* The name of the preprocessor macro to define for this architecture. PROFILE
2309 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2310 is thus chosen to be big enough to hold the longest architecture name. */
2312 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2314 /* Supported TLS relocations. */
2322 TLS_DESCSEQ
/* GNU scheme */
2325 /* The maximum number of insns to be used when loading a constant. */
2327 arm_constant_limit (bool size_p
)
2329 return size_p
? 1 : current_tune
->constant_limit
;
2332 /* Emit an insn that's a simple single-set. Both the operands must be known
2334 inline static rtx_insn
*
2335 emit_set_insn (rtx x
, rtx y
)
2337 return emit_insn (gen_rtx_SET (x
, y
));
2340 /* Return the number of bits set in VALUE. */
2342 bit_count (unsigned long value
)
2344 unsigned long count
= 0;
2349 value
&= value
- 1; /* Clear the least-significant set bit. */
2355 /* Return the number of bits set in BMAP. */
2357 bitmap_popcount (const sbitmap bmap
)
2359 unsigned int count
= 0;
2361 sbitmap_iterator sbi
;
2363 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2372 } arm_fixed_mode_set
;
2374 /* A small helper for setting fixed-point library libfuncs. */
2377 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2378 const char *funcname
, const char *modename
,
2383 if (num_suffix
== 0)
2384 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2386 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2388 set_optab_libfunc (optable
, mode
, buffer
);
2392 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2393 machine_mode from
, const char *funcname
,
2394 const char *toname
, const char *fromname
)
2397 const char *maybe_suffix_2
= "";
2399 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2400 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2401 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2402 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2403 maybe_suffix_2
= "2";
2405 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2408 set_conv_libfunc (optable
, to
, from
, buffer
);
2411 /* Set up library functions unique to ARM. */
2414 arm_init_libfuncs (void)
2416 /* For Linux, we have access to kernel support for atomic operations. */
2417 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2418 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2420 /* There are no special library functions unless we are using the
2425 /* The functions below are described in Section 4 of the "Run-Time
2426 ABI for the ARM architecture", Version 1.0. */
2428 /* Double-precision floating-point arithmetic. Table 2. */
2429 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2430 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2431 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2432 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2433 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2435 /* Double-precision comparisons. Table 3. */
2436 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2437 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2438 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2439 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2440 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2441 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2442 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2444 /* Single-precision floating-point arithmetic. Table 4. */
2445 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2446 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2447 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2448 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2449 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2451 /* Single-precision comparisons. Table 5. */
2452 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2453 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2454 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2455 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2456 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2457 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2458 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2460 /* Floating-point to integer conversions. Table 6. */
2461 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2462 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2463 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2464 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2465 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2466 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2467 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2468 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2470 /* Conversions between floating types. Table 7. */
2471 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2472 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2474 /* Integer to floating-point conversions. Table 8. */
2475 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2476 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2477 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2478 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2479 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2480 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2481 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2482 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2484 /* Long long. Table 9. */
2485 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2486 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2487 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2488 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2489 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2490 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2491 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2492 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2494 /* Integer (32/32->32) division. \S 4.3.1. */
2495 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2496 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2498 /* The divmod functions are designed so that they can be used for
2499 plain division, even though they return both the quotient and the
2500 remainder. The quotient is returned in the usual location (i.e.,
2501 r0 for SImode, {r0, r1} for DImode), just as would be expected
2502 for an ordinary division routine. Because the AAPCS calling
2503 conventions specify that all of { r0, r1, r2, r3 } are
2504 callee-saved registers, there is no need to tell the compiler
2505 explicitly that those registers are clobbered by these
2507 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2508 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2510 /* For SImode division the ABI provides div-without-mod routines,
2511 which are faster. */
2512 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2513 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2515 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2516 divmod libcalls instead. */
2517 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2518 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2519 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2520 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2522 /* Half-precision float operations. The compiler handles all operations
2523 with NULL libfuncs by converting the SFmode. */
2524 switch (arm_fp16_format
)
2526 case ARM_FP16_FORMAT_IEEE
:
2527 case ARM_FP16_FORMAT_ALTERNATIVE
:
2530 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2531 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2533 : "__gnu_f2h_alternative"));
2534 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2535 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2537 : "__gnu_h2f_alternative"));
2539 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2540 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2542 : "__gnu_d2h_alternative"));
2545 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2546 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2547 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2548 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2549 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2552 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2553 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2554 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2555 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2556 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2557 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2558 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2565 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2567 const arm_fixed_mode_set fixed_arith_modes
[] =
2588 const arm_fixed_mode_set fixed_conv_modes
[] =
2618 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2620 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2621 "add", fixed_arith_modes
[i
].name
, 3);
2622 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2623 "ssadd", fixed_arith_modes
[i
].name
, 3);
2624 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2625 "usadd", fixed_arith_modes
[i
].name
, 3);
2626 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2627 "sub", fixed_arith_modes
[i
].name
, 3);
2628 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2629 "sssub", fixed_arith_modes
[i
].name
, 3);
2630 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2631 "ussub", fixed_arith_modes
[i
].name
, 3);
2632 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2633 "mul", fixed_arith_modes
[i
].name
, 3);
2634 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2635 "ssmul", fixed_arith_modes
[i
].name
, 3);
2636 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2637 "usmul", fixed_arith_modes
[i
].name
, 3);
2638 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2639 "div", fixed_arith_modes
[i
].name
, 3);
2640 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2641 "udiv", fixed_arith_modes
[i
].name
, 3);
2642 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2643 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2644 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2645 "usdiv", fixed_arith_modes
[i
].name
, 3);
2646 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2647 "neg", fixed_arith_modes
[i
].name
, 2);
2648 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2649 "ssneg", fixed_arith_modes
[i
].name
, 2);
2650 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2651 "usneg", fixed_arith_modes
[i
].name
, 2);
2652 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2653 "ashl", fixed_arith_modes
[i
].name
, 3);
2654 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2655 "ashr", fixed_arith_modes
[i
].name
, 3);
2656 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2657 "lshr", fixed_arith_modes
[i
].name
, 3);
2658 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2659 "ssashl", fixed_arith_modes
[i
].name
, 3);
2660 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2661 "usashl", fixed_arith_modes
[i
].name
, 3);
2662 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2663 "cmp", fixed_arith_modes
[i
].name
, 2);
2666 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2667 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2670 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2671 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2674 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2675 fixed_conv_modes
[j
].mode
, "fract",
2676 fixed_conv_modes
[i
].name
,
2677 fixed_conv_modes
[j
].name
);
2678 arm_set_fixed_conv_libfunc (satfract_optab
,
2679 fixed_conv_modes
[i
].mode
,
2680 fixed_conv_modes
[j
].mode
, "satfract",
2681 fixed_conv_modes
[i
].name
,
2682 fixed_conv_modes
[j
].name
);
2683 arm_set_fixed_conv_libfunc (fractuns_optab
,
2684 fixed_conv_modes
[i
].mode
,
2685 fixed_conv_modes
[j
].mode
, "fractuns",
2686 fixed_conv_modes
[i
].name
,
2687 fixed_conv_modes
[j
].name
);
2688 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2689 fixed_conv_modes
[i
].mode
,
2690 fixed_conv_modes
[j
].mode
, "satfractuns",
2691 fixed_conv_modes
[i
].name
,
2692 fixed_conv_modes
[j
].name
);
2696 if (TARGET_AAPCS_BASED
)
2697 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2700 /* On AAPCS systems, this is the "struct __va_list". */
2701 static GTY(()) tree va_list_type
;
2703 /* Return the type to use as __builtin_va_list. */
2705 arm_build_builtin_va_list (void)
2710 if (!TARGET_AAPCS_BASED
)
2711 return std_build_builtin_va_list ();
2713 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2721 The C Library ABI further reinforces this definition in \S
2724 We must follow this definition exactly. The structure tag
2725 name is visible in C++ mangled names, and thus forms a part
2726 of the ABI. The field name may be used by people who
2727 #include <stdarg.h>. */
2728 /* Create the type. */
2729 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2730 /* Give it the required name. */
2731 va_list_name
= build_decl (BUILTINS_LOCATION
,
2733 get_identifier ("__va_list"),
2735 DECL_ARTIFICIAL (va_list_name
) = 1;
2736 TYPE_NAME (va_list_type
) = va_list_name
;
2737 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2738 /* Create the __ap field. */
2739 ap_field
= build_decl (BUILTINS_LOCATION
,
2741 get_identifier ("__ap"),
2743 DECL_ARTIFICIAL (ap_field
) = 1;
2744 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2745 TYPE_FIELDS (va_list_type
) = ap_field
;
2746 /* Compute its layout. */
2747 layout_type (va_list_type
);
2749 return va_list_type
;
2752 /* Return an expression of type "void *" pointing to the next
2753 available argument in a variable-argument list. VALIST is the
2754 user-level va_list object, of type __builtin_va_list. */
2756 arm_extract_valist_ptr (tree valist
)
2758 if (TREE_TYPE (valist
) == error_mark_node
)
2759 return error_mark_node
;
2761 /* On an AAPCS target, the pointer is stored within "struct
2763 if (TARGET_AAPCS_BASED
)
2765 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2766 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2767 valist
, ap_field
, NULL_TREE
);
2773 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2775 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2777 valist
= arm_extract_valist_ptr (valist
);
2778 std_expand_builtin_va_start (valist
, nextarg
);
2781 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2783 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2786 valist
= arm_extract_valist_ptr (valist
);
2787 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2790 /* Check any incompatible options that the user has specified. */
2792 arm_option_check_internal (struct gcc_options
*opts
)
2794 int flags
= opts
->x_target_flags
;
2796 /* iWMMXt and NEON are incompatible. */
2798 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2799 error ("iWMMXt and NEON are incompatible");
2801 /* Make sure that the processor choice does not conflict with any of the
2802 other command line choices. */
2803 if (TARGET_ARM_P (flags
)
2804 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2805 error ("target CPU does not support ARM mode");
2807 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2808 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2809 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2811 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2812 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2814 /* If this target is normally configured to use APCS frames, warn if they
2815 are turned off and debugging is turned on. */
2816 if (TARGET_ARM_P (flags
)
2817 && write_symbols
!= NO_DEBUG
2818 && !TARGET_APCS_FRAME
2819 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2820 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2822 /* iWMMXt unsupported under Thumb mode. */
2823 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2824 error ("iWMMXt unsupported under Thumb mode");
2826 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2827 error ("can not use -mtp=cp15 with 16-bit Thumb");
2829 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2831 error ("RTP PIC is incompatible with Thumb");
2835 /* We only support -mslow-flash-data on armv7-m targets. */
2836 if (target_slow_flash_data
2837 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2838 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2839 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2841 /* We only support pure-code on Thumb-2 M-profile targets. */
2842 if (target_pure_code
2843 && (!arm_arch_thumb2
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2844 error ("-mpure-code only supports non-pic code on armv7-m targets");
2848 /* Recompute the global settings depending on target attribute options. */
2851 arm_option_params_internal (void)
2853 /* If we are not using the default (ARM mode) section anchor offset
2854 ranges, then set the correct ranges now. */
2857 /* Thumb-1 LDR instructions cannot have negative offsets.
2858 Permissible positive offset ranges are 5-bit (for byte loads),
2859 6-bit (for halfword loads), or 7-bit (for word loads).
2860 Empirical results suggest a 7-bit anchor range gives the best
2861 overall code size. */
2862 targetm
.min_anchor_offset
= 0;
2863 targetm
.max_anchor_offset
= 127;
2865 else if (TARGET_THUMB2
)
2867 /* The minimum is set such that the total size of the block
2868 for a particular anchor is 248 + 1 + 4095 bytes, which is
2869 divisible by eight, ensuring natural spacing of anchors. */
2870 targetm
.min_anchor_offset
= -248;
2871 targetm
.max_anchor_offset
= 4095;
2875 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2876 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2881 /* If optimizing for size, bump the number of instructions that we
2882 are prepared to conditionally execute (even on a StrongARM). */
2883 max_insns_skipped
= 6;
2885 /* For THUMB2, we limit the conditional sequence to one IT block. */
2887 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2890 /* When -mrestrict-it is in use tone down the if-conversion. */
2891 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2892 ? 1 : current_tune
->max_insns_skipped
;
2895 /* True if -mflip-thumb should next add an attribute for the default
2896 mode, false if it should next add an attribute for the opposite mode. */
2897 static GTY(()) bool thumb_flipper
;
2899 /* Options after initial target override. */
2900 static GTY(()) tree init_optimize
;
2903 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2905 if (opts
->x_align_functions
<= 0)
2906 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2907 && opts
->x_optimize_size
? 2 : 4;
2910 /* Implement targetm.override_options_after_change. */
2913 arm_override_options_after_change (void)
2915 arm_configure_build_target (&arm_active_target
,
2916 TREE_TARGET_OPTION (target_option_default_node
),
2917 &global_options_set
, false);
2919 arm_override_options_after_change_1 (&global_options
);
2923 arm_option_restore (struct gcc_options
*, struct cl_target_option
*ptr
)
2925 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2929 /* Reset options between modes that the user has specified. */
2931 arm_option_override_internal (struct gcc_options
*opts
,
2932 struct gcc_options
*opts_set
)
2934 arm_override_options_after_change_1 (opts
);
2936 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2938 /* The default is to enable interworking, so this warning message would
2939 be confusing to users who have just compiled with, eg, -march=armv3. */
2940 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2941 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2944 if (TARGET_THUMB_P (opts
->x_target_flags
)
2945 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2947 warning (0, "target CPU does not support THUMB instructions");
2948 opts
->x_target_flags
&= ~MASK_THUMB
;
2951 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2953 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2954 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2957 /* Callee super interworking implies thumb interworking. Adding
2958 this to the flags here simplifies the logic elsewhere. */
2959 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2960 opts
->x_target_flags
|= MASK_INTERWORK
;
2962 /* need to remember initial values so combinaisons of options like
2963 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2964 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2966 if (! opts_set
->x_arm_restrict_it
)
2967 opts
->x_arm_restrict_it
= arm_arch8
;
2969 /* ARM execution state and M profile don't have [restrict] IT. */
2970 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2971 opts
->x_arm_restrict_it
= 0;
2973 /* Enable -munaligned-access by default for
2974 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2975 i.e. Thumb2 and ARM state only.
2976 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2977 - ARMv8 architecture-base processors.
2979 Disable -munaligned-access by default for
2980 - all pre-ARMv6 architecture-based processors
2981 - ARMv6-M architecture-based processors
2982 - ARMv8-M Baseline processors. */
2984 if (! opts_set
->x_unaligned_access
)
2986 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2987 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2989 else if (opts
->x_unaligned_access
== 1
2990 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2992 warning (0, "target CPU does not support unaligned accesses");
2993 opts
->x_unaligned_access
= 0;
2996 /* Don't warn since it's on by default in -O2. */
2997 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2998 opts
->x_flag_schedule_insns
= 0;
3000 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3002 /* Disable shrink-wrap when optimizing function for size, since it tends to
3003 generate additional returns. */
3004 if (optimize_function_for_size_p (cfun
)
3005 && TARGET_THUMB2_P (opts
->x_target_flags
))
3006 opts
->x_flag_shrink_wrap
= false;
3008 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3010 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3011 - epilogue_insns - does not accurately model the corresponding insns
3012 emitted in the asm file. In particular, see the comment in thumb_exit
3013 'Find out how many of the (return) argument registers we can corrupt'.
3014 As a consequence, the epilogue may clobber registers without fipa-ra
3015 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3016 TODO: Accurately model clobbers for epilogue_insns and reenable
3018 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3019 opts
->x_flag_ipa_ra
= 0;
3021 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3023 /* Thumb2 inline assembly code should always use unified syntax.
3024 This will apply to ARM and Thumb1 eventually. */
3025 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3027 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3028 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3032 /* Convert a static initializer array of feature bits to sbitmap
3035 arm_initialize_isa (sbitmap isa
, const enum isa_feature
*isa_bits
)
3038 while (*isa_bits
!= isa_nobit
)
3039 bitmap_set_bit (isa
, *(isa_bits
++));
3042 static sbitmap isa_all_fpubits
;
3043 static sbitmap isa_quirkbits
;
3045 /* Configure a build target TARGET from the user-specified options OPTS and
3046 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3047 architecture have been specified, but the two are not identical. */
3049 arm_configure_build_target (struct arm_build_target
*target
,
3050 struct cl_target_option
*opts
,
3051 struct gcc_options
*opts_set
,
3052 bool warn_compatible
)
3054 const struct processors
*arm_selected_tune
= NULL
;
3055 const struct processors
*arm_selected_arch
= NULL
;
3056 const struct processors
*arm_selected_cpu
= NULL
;
3057 const struct arm_fpu_desc
*arm_selected_fpu
= NULL
;
3059 bitmap_clear (target
->isa
);
3060 target
->core_name
= NULL
;
3061 target
->arch_name
= NULL
;
3063 if (opts_set
->x_arm_arch_option
)
3064 arm_selected_arch
= &all_architectures
[opts
->x_arm_arch_option
];
3066 if (opts_set
->x_arm_cpu_option
)
3068 arm_selected_cpu
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3069 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3072 if (opts_set
->x_arm_tune_option
)
3073 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_tune_option
];
3075 if (arm_selected_arch
)
3077 arm_initialize_isa (target
->isa
, arm_selected_arch
->isa_bits
);
3079 if (arm_selected_cpu
)
3081 auto_sbitmap
cpu_isa (isa_num_bits
);
3083 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->isa_bits
);
3084 bitmap_xor (cpu_isa
, cpu_isa
, target
->isa
);
3085 /* Ignore any bits that are quirk bits. */
3086 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_quirkbits
);
3087 /* Ignore (for now) any bits that might be set by -mfpu. */
3088 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_all_fpubits
);
3090 if (!bitmap_empty_p (cpu_isa
))
3092 if (warn_compatible
)
3093 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3094 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3095 /* -march wins for code generation.
3096 -mcpu wins for default tuning. */
3097 if (!arm_selected_tune
)
3098 arm_selected_tune
= arm_selected_cpu
;
3100 arm_selected_cpu
= arm_selected_arch
;
3104 /* Architecture and CPU are essentially the same.
3105 Prefer the CPU setting. */
3106 arm_selected_arch
= NULL
;
3109 target
->core_name
= arm_selected_cpu
->name
;
3113 /* Pick a CPU based on the architecture. */
3114 arm_selected_cpu
= arm_selected_arch
;
3115 target
->arch_name
= arm_selected_arch
->name
;
3116 /* Note: target->core_name is left unset in this path. */
3119 else if (arm_selected_cpu
)
3121 target
->core_name
= arm_selected_cpu
->name
;
3122 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3124 /* If the user did not specify a processor, choose one for them. */
3127 const struct processors
* sel
;
3128 auto_sbitmap
sought_isa (isa_num_bits
);
3129 bitmap_clear (sought_isa
);
3130 auto_sbitmap
default_isa (isa_num_bits
);
3132 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3133 gcc_assert (arm_selected_cpu
->name
);
3135 /* RWE: All of the selection logic below (to the end of this
3136 'if' clause) looks somewhat suspect. It appears to be mostly
3137 there to support forcing thumb support when the default CPU
3138 does not have thumb (somewhat dubious in terms of what the
3139 user might be expecting). I think it should be removed once
3140 support for the pre-thumb era cores is removed. */
3141 sel
= arm_selected_cpu
;
3142 arm_initialize_isa (default_isa
, sel
->isa_bits
);
3144 /* Now check to see if the user has specified any command line
3145 switches that require certain abilities from the cpu. */
3147 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3149 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3150 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3152 /* There are no ARM processors that support both APCS-26 and
3153 interworking. Therefore we forcibly remove MODE26 from
3154 from the isa features here (if it was set), so that the
3155 search below will always be able to find a compatible
3157 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3160 /* If there are such requirements and the default CPU does not
3161 satisfy them, we need to run over the complete list of
3162 cores looking for one that is satisfactory. */
3163 if (!bitmap_empty_p (sought_isa
)
3164 && !bitmap_subset_p (sought_isa
, default_isa
))
3166 auto_sbitmap
candidate_isa (isa_num_bits
);
3167 /* We're only interested in a CPU with at least the
3168 capabilities of the default CPU and the required
3169 additional features. */
3170 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3172 /* Try to locate a CPU type that supports all of the abilities
3173 of the default CPU, plus the extra abilities requested by
3175 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3177 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3178 /* An exact match? */
3179 if (bitmap_equal_p (default_isa
, candidate_isa
))
3183 if (sel
->name
== NULL
)
3185 unsigned current_bit_count
= isa_num_bits
;
3186 const struct processors
* best_fit
= NULL
;
3188 /* Ideally we would like to issue an error message here
3189 saying that it was not possible to find a CPU compatible
3190 with the default CPU, but which also supports the command
3191 line options specified by the programmer, and so they
3192 ought to use the -mcpu=<name> command line option to
3193 override the default CPU type.
3195 If we cannot find a CPU that has exactly the
3196 characteristics of the default CPU and the given
3197 command line options we scan the array again looking
3198 for a best match. The best match must have at least
3199 the capabilities of the perfect match. */
3200 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3202 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3204 if (bitmap_subset_p (default_isa
, candidate_isa
))
3208 bitmap_and_compl (candidate_isa
, candidate_isa
,
3210 count
= bitmap_popcount (candidate_isa
);
3212 if (count
< current_bit_count
)
3215 current_bit_count
= count
;
3219 gcc_assert (best_fit
);
3223 arm_selected_cpu
= sel
;
3226 /* Now we know the CPU, we can finally initialize the target
3228 target
->core_name
= arm_selected_cpu
->name
;
3229 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3232 gcc_assert (arm_selected_cpu
);
3234 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3236 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3237 auto_sbitmap
fpu_bits (isa_num_bits
);
3239 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3240 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3241 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3243 else if (target
->core_name
== NULL
)
3244 /* To support this we need to be able to parse FPU feature options
3245 from the architecture string. */
3246 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3248 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3249 if (!arm_selected_tune
)
3250 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3252 /* Finish initializing the target structure. */
3253 target
->arch_pp_name
= arm_selected_cpu
->arch
;
3254 target
->base_arch
= arm_selected_cpu
->base_arch
;
3255 target
->arch_core
= arm_selected_cpu
->core
;
3257 target
->tune_flags
= arm_selected_tune
->tune_flags
;
3258 target
->tune
= arm_selected_tune
->tune
;
3259 target
->tune_core
= arm_selected_tune
->core
;
3262 /* Fix up any incompatible options that the user has specified. */
3264 arm_option_override (void)
3266 static const enum isa_feature fpu_bitlist
[] = { ISA_ALL_FPU
, isa_nobit
};
3267 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3268 cl_target_option opts
;
3270 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3271 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3273 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3274 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3276 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3278 if (!global_options_set
.x_arm_fpu_index
)
3280 const char *target_fpu_name
;
3284 #ifdef FPUTYPE_DEFAULT
3285 target_fpu_name
= FPUTYPE_DEFAULT
;
3287 target_fpu_name
= "vfp";
3290 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &fpu_index
,
3293 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3296 cl_target_option_save (&opts
, &global_options
);
3297 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3300 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3301 SUBTARGET_OVERRIDE_OPTIONS
;
3304 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3305 arm_base_arch
= arm_active_target
.base_arch
;
3307 arm_tune
= arm_active_target
.tune_core
;
3308 tune_flags
= arm_active_target
.tune_flags
;
3309 current_tune
= arm_active_target
.tune
;
3311 /* TBD: Dwarf info for apcs frame is not handled yet. */
3312 if (TARGET_APCS_FRAME
)
3313 flag_shrink_wrap
= false;
3315 /* BPABI targets use linker tricks to allow interworking on cores
3316 without thumb support. */
3317 if (TARGET_INTERWORK
3319 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3321 warning (0, "target CPU does not support interworking" );
3322 target_flags
&= ~MASK_INTERWORK
;
3325 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3327 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3328 target_flags
|= MASK_APCS_FRAME
;
3331 if (TARGET_POKE_FUNCTION_NAME
)
3332 target_flags
|= MASK_APCS_FRAME
;
3334 if (TARGET_APCS_REENT
&& flag_pic
)
3335 error ("-fpic and -mapcs-reent are incompatible");
3337 if (TARGET_APCS_REENT
)
3338 warning (0, "APCS reentrant code not supported. Ignored");
3340 /* Initialize boolean versions of the architectural flags, for use
3341 in the arm.md file. */
3342 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv3m
);
3343 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv4
);
3344 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3345 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5
);
3346 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5e
);
3347 arm_arch5te
= arm_arch5e
3348 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3349 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6
);
3350 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6k
);
3351 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3352 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3353 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7
);
3354 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7em
);
3355 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8
);
3356 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_1
);
3357 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_2
);
3358 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3359 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3360 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3361 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3362 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3363 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3364 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3365 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3366 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3367 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3369 = (arm_arch6k
&& arm_arch7
&& arm_arch_thumb_hwdiv
&& arm_arch_arm_hwdiv
);
3372 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3373 error ("selected fp16 options are incompatible");
3374 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3378 /* Set up some tuning parameters. */
3379 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3380 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3381 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3382 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3383 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3384 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3386 /* And finally, set up some quirks. */
3387 arm_arch_no_volatile_ce
3388 = bitmap_bit_p (arm_active_target
.isa
, isa_quirk_no_volatile_ce
);
3390 = arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
, isa_quirk_ARMv6kz
);
3392 /* V5 code we generate is completely interworking capable, so we turn off
3393 TARGET_INTERWORK here to avoid many tests later on. */
3395 /* XXX However, we must pass the right pre-processor defines to CPP
3396 or GLD can get confused. This is a hack. */
3397 if (TARGET_INTERWORK
)
3398 arm_cpp_interwork
= 1;
3401 target_flags
&= ~MASK_INTERWORK
;
3403 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3404 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3406 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3407 error ("iwmmxt abi requires an iwmmxt capable cpu");
3409 /* If soft-float is specified then don't use FPU. */
3410 if (TARGET_SOFT_FLOAT
)
3411 arm_fpu_attr
= FPU_NONE
;
3413 arm_fpu_attr
= FPU_VFP
;
3415 if (TARGET_AAPCS_BASED
)
3417 if (TARGET_CALLER_INTERWORKING
)
3418 error ("AAPCS does not support -mcaller-super-interworking");
3420 if (TARGET_CALLEE_INTERWORKING
)
3421 error ("AAPCS does not support -mcallee-super-interworking");
3424 /* __fp16 support currently assumes the core has ldrh. */
3425 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3426 sorry ("__fp16 and no ldrh");
3428 if (TARGET_AAPCS_BASED
)
3430 if (arm_abi
== ARM_ABI_IWMMXT
)
3431 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3432 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3433 && TARGET_HARD_FLOAT
)
3435 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3436 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_VFPv2
))
3437 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3440 arm_pcs_default
= ARM_PCS_AAPCS
;
3444 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3445 sorry ("-mfloat-abi=hard and VFP");
3447 if (arm_abi
== ARM_ABI_APCS
)
3448 arm_pcs_default
= ARM_PCS_APCS
;
3450 arm_pcs_default
= ARM_PCS_ATPCS
;
3453 /* For arm2/3 there is no need to do any scheduling if we are doing
3454 software floating-point. */
3455 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3456 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3458 /* Use the cp15 method if it is available. */
3459 if (target_thread_pointer
== TP_AUTO
)
3461 if (arm_arch6k
&& !TARGET_THUMB1
)
3462 target_thread_pointer
= TP_CP15
;
3464 target_thread_pointer
= TP_SOFT
;
3467 /* Override the default structure alignment for AAPCS ABI. */
3468 if (!global_options_set
.x_arm_structure_size_boundary
)
3470 if (TARGET_AAPCS_BASED
)
3471 arm_structure_size_boundary
= 8;
3475 if (arm_structure_size_boundary
!= 8
3476 && arm_structure_size_boundary
!= 32
3477 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3479 if (ARM_DOUBLEWORD_ALIGN
)
3481 "structure size boundary can only be set to 8, 32 or 64");
3483 warning (0, "structure size boundary can only be set to 8 or 32");
3484 arm_structure_size_boundary
3485 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3489 if (TARGET_VXWORKS_RTP
)
3491 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3492 arm_pic_data_is_text_relative
= 0;
3495 && !arm_pic_data_is_text_relative
3496 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3497 /* When text & data segments don't have a fixed displacement, the
3498 intended use is with a single, read only, pic base register.
3499 Unless the user explicitly requested not to do that, set
3501 target_flags
|= MASK_SINGLE_PIC_BASE
;
3503 /* If stack checking is disabled, we can use r10 as the PIC register,
3504 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3505 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3507 if (TARGET_VXWORKS_RTP
)
3508 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3509 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3512 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3513 arm_pic_register
= 9;
3515 if (arm_pic_register_string
!= NULL
)
3517 int pic_register
= decode_reg_name (arm_pic_register_string
);
3520 warning (0, "-mpic-register= is useless without -fpic");
3522 /* Prevent the user from choosing an obviously stupid PIC register. */
3523 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3524 || pic_register
== HARD_FRAME_POINTER_REGNUM
3525 || pic_register
== STACK_POINTER_REGNUM
3526 || pic_register
>= PC_REGNUM
3527 || (TARGET_VXWORKS_RTP
3528 && (unsigned int) pic_register
!= arm_pic_register
))
3529 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3531 arm_pic_register
= pic_register
;
3534 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3535 if (fix_cm3_ldrd
== 2)
3537 if (bitmap_bit_p (arm_active_target
.isa
, isa_quirk_cm3_ldrd
))
3543 /* Hot/Cold partitioning is not currently supported, since we can't
3544 handle literal pool placement in that case. */
3545 if (flag_reorder_blocks_and_partition
)
3547 inform (input_location
,
3548 "-freorder-blocks-and-partition not supported on this architecture");
3549 flag_reorder_blocks_and_partition
= 0;
3550 flag_reorder_blocks
= 1;
3554 /* Hoisting PIC address calculations more aggressively provides a small,
3555 but measurable, size reduction for PIC code. Therefore, we decrease
3556 the bar for unrestricted expression hoisting to the cost of PIC address
3557 calculation, which is 2 instructions. */
3558 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3559 global_options
.x_param_values
,
3560 global_options_set
.x_param_values
);
3562 /* ARM EABI defaults to strict volatile bitfields. */
3563 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3564 && abi_version_at_least(2))
3565 flag_strict_volatile_bitfields
= 1;
3567 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3568 have deemed it beneficial (signified by setting
3569 prefetch.num_slots to 1 or more). */
3570 if (flag_prefetch_loop_arrays
< 0
3573 && current_tune
->prefetch
.num_slots
> 0)
3574 flag_prefetch_loop_arrays
= 1;
3576 /* Set up parameters to be used in prefetching algorithm. Do not
3577 override the defaults unless we are tuning for a core we have
3578 researched values for. */
3579 if (current_tune
->prefetch
.num_slots
> 0)
3580 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3581 current_tune
->prefetch
.num_slots
,
3582 global_options
.x_param_values
,
3583 global_options_set
.x_param_values
);
3584 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3585 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3586 current_tune
->prefetch
.l1_cache_line_size
,
3587 global_options
.x_param_values
,
3588 global_options_set
.x_param_values
);
3589 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3590 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3591 current_tune
->prefetch
.l1_cache_size
,
3592 global_options
.x_param_values
,
3593 global_options_set
.x_param_values
);
3595 /* Use Neon to perform 64-bits operations rather than core
3597 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3598 if (use_neon_for_64bits
== 1)
3599 prefer_neon_for_64bits
= true;
3601 /* Use the alternative scheduling-pressure algorithm by default. */
3602 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3603 global_options
.x_param_values
,
3604 global_options_set
.x_param_values
);
3606 /* Look through ready list and all of queue for instructions
3607 relevant for L2 auto-prefetcher. */
3608 int param_sched_autopref_queue_depth
;
3610 switch (current_tune
->sched_autopref
)
3612 case tune_params::SCHED_AUTOPREF_OFF
:
3613 param_sched_autopref_queue_depth
= -1;
3616 case tune_params::SCHED_AUTOPREF_RANK
:
3617 param_sched_autopref_queue_depth
= 0;
3620 case tune_params::SCHED_AUTOPREF_FULL
:
3621 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3628 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3629 param_sched_autopref_queue_depth
,
3630 global_options
.x_param_values
,
3631 global_options_set
.x_param_values
);
3633 /* Currently, for slow flash data, we just disable literal pools. We also
3634 disable it for pure-code. */
3635 if (target_slow_flash_data
|| target_pure_code
)
3636 arm_disable_literal_pool
= true;
3638 if (use_cmse
&& !arm_arch_cmse
)
3639 error ("target CPU does not support ARMv8-M Security Extensions");
3641 /* Disable scheduling fusion by default if it's not armv7 processor
3642 or doesn't prefer ldrd/strd. */
3643 if (flag_schedule_fusion
== 2
3644 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3645 flag_schedule_fusion
= 0;
3647 /* Need to remember initial options before they are overriden. */
3648 init_optimize
= build_optimization_node (&global_options
);
3650 arm_option_override_internal (&global_options
, &global_options_set
);
3651 arm_option_check_internal (&global_options
);
3652 arm_option_params_internal ();
3654 /* Create the default target_options structure. */
3655 target_option_default_node
= target_option_current_node
3656 = build_target_option_node (&global_options
);
3658 /* Register global variables with the garbage collector. */
3659 arm_add_gc_roots ();
3661 /* Init initial mode for testing. */
3662 thumb_flipper
= TARGET_THUMB
;
3666 arm_add_gc_roots (void)
3668 gcc_obstack_init(&minipool_obstack
);
3669 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3672 /* A table of known ARM exception types.
3673 For use with the interrupt function attribute. */
3677 const char *const arg
;
3678 const unsigned long return_value
;
3682 static const isr_attribute_arg isr_attribute_args
[] =
3684 { "IRQ", ARM_FT_ISR
},
3685 { "irq", ARM_FT_ISR
},
3686 { "FIQ", ARM_FT_FIQ
},
3687 { "fiq", ARM_FT_FIQ
},
3688 { "ABORT", ARM_FT_ISR
},
3689 { "abort", ARM_FT_ISR
},
3690 { "ABORT", ARM_FT_ISR
},
3691 { "abort", ARM_FT_ISR
},
3692 { "UNDEF", ARM_FT_EXCEPTION
},
3693 { "undef", ARM_FT_EXCEPTION
},
3694 { "SWI", ARM_FT_EXCEPTION
},
3695 { "swi", ARM_FT_EXCEPTION
},
3696 { NULL
, ARM_FT_NORMAL
}
3699 /* Returns the (interrupt) function type of the current
3700 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3702 static unsigned long
3703 arm_isr_value (tree argument
)
3705 const isr_attribute_arg
* ptr
;
3709 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3711 /* No argument - default to IRQ. */
3712 if (argument
== NULL_TREE
)
3715 /* Get the value of the argument. */
3716 if (TREE_VALUE (argument
) == NULL_TREE
3717 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3718 return ARM_FT_UNKNOWN
;
3720 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3722 /* Check it against the list of known arguments. */
3723 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3724 if (streq (arg
, ptr
->arg
))
3725 return ptr
->return_value
;
3727 /* An unrecognized interrupt type. */
3728 return ARM_FT_UNKNOWN
;
3731 /* Computes the type of the current function. */
3733 static unsigned long
3734 arm_compute_func_type (void)
3736 unsigned long type
= ARM_FT_UNKNOWN
;
3740 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3742 /* Decide if the current function is volatile. Such functions
3743 never return, and many memory cycles can be saved by not storing
3744 register values that will never be needed again. This optimization
3745 was added to speed up context switching in a kernel application. */
3747 && (TREE_NOTHROW (current_function_decl
)
3748 || !(flag_unwind_tables
3750 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3751 && TREE_THIS_VOLATILE (current_function_decl
))
3752 type
|= ARM_FT_VOLATILE
;
3754 if (cfun
->static_chain_decl
!= NULL
)
3755 type
|= ARM_FT_NESTED
;
3757 attr
= DECL_ATTRIBUTES (current_function_decl
);
3759 a
= lookup_attribute ("naked", attr
);
3761 type
|= ARM_FT_NAKED
;
3763 a
= lookup_attribute ("isr", attr
);
3765 a
= lookup_attribute ("interrupt", attr
);
3768 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3770 type
|= arm_isr_value (TREE_VALUE (a
));
3772 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3773 type
|= ARM_FT_CMSE_ENTRY
;
3778 /* Returns the type of the current function. */
3781 arm_current_func_type (void)
3783 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3784 cfun
->machine
->func_type
= arm_compute_func_type ();
3786 return cfun
->machine
->func_type
;
3790 arm_allocate_stack_slots_for_args (void)
3792 /* Naked functions should not allocate stack slots for arguments. */
3793 return !IS_NAKED (arm_current_func_type ());
3797 arm_warn_func_return (tree decl
)
3799 /* Naked functions are implemented entirely in assembly, including the
3800 return sequence, so suppress warnings about this. */
3801 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3805 /* Output assembler code for a block containing the constant parts
3806 of a trampoline, leaving space for the variable parts.
3808 On the ARM, (if r8 is the static chain regnum, and remembering that
3809 referencing pc adds an offset of 8) the trampoline looks like:
3812 .word static chain value
3813 .word function's address
3814 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3817 arm_asm_trampoline_template (FILE *f
)
3819 fprintf (f
, "\t.syntax unified\n");
3823 fprintf (f
, "\t.arm\n");
3824 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3825 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3827 else if (TARGET_THUMB2
)
3829 fprintf (f
, "\t.thumb\n");
3830 /* The Thumb-2 trampoline is similar to the arm implementation.
3831 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3832 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3833 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3834 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3838 ASM_OUTPUT_ALIGN (f
, 2);
3839 fprintf (f
, "\t.code\t16\n");
3840 fprintf (f
, ".Ltrampoline_start:\n");
3841 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3842 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3843 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3844 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3845 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3846 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3848 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3849 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3852 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3855 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3857 rtx fnaddr
, mem
, a_tramp
;
3859 emit_block_move (m_tramp
, assemble_trampoline_template (),
3860 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3862 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3863 emit_move_insn (mem
, chain_value
);
3865 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3866 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3867 emit_move_insn (mem
, fnaddr
);
3869 a_tramp
= XEXP (m_tramp
, 0);
3870 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3871 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3872 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3875 /* Thumb trampolines should be entered in thumb mode, so set
3876 the bottom bit of the address. */
3879 arm_trampoline_adjust_address (rtx addr
)
3882 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3883 NULL
, 0, OPTAB_LIB_WIDEN
);
3887 /* Return 1 if it is possible to return using a single instruction.
3888 If SIBLING is non-null, this is a test for a return before a sibling
3889 call. SIBLING is the call insn, so we can examine its register usage. */
3892 use_return_insn (int iscond
, rtx sibling
)
3895 unsigned int func_type
;
3896 unsigned long saved_int_regs
;
3897 unsigned HOST_WIDE_INT stack_adjust
;
3898 arm_stack_offsets
*offsets
;
3900 /* Never use a return instruction before reload has run. */
3901 if (!reload_completed
)
3904 func_type
= arm_current_func_type ();
3906 /* Naked, volatile and stack alignment functions need special
3908 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3911 /* So do interrupt functions that use the frame pointer and Thumb
3912 interrupt functions. */
3913 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3916 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3917 && !optimize_function_for_size_p (cfun
))
3920 offsets
= arm_get_frame_offsets ();
3921 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3923 /* As do variadic functions. */
3924 if (crtl
->args
.pretend_args_size
3925 || cfun
->machine
->uses_anonymous_args
3926 /* Or if the function calls __builtin_eh_return () */
3927 || crtl
->calls_eh_return
3928 /* Or if the function calls alloca */
3929 || cfun
->calls_alloca
3930 /* Or if there is a stack adjustment. However, if the stack pointer
3931 is saved on the stack, we can use a pre-incrementing stack load. */
3932 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3933 && stack_adjust
== 4))
3934 /* Or if the static chain register was saved above the frame, under the
3935 assumption that the stack pointer isn't saved on the stack. */
3936 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3937 && arm_compute_static_chain_stack_bytes() != 0))
3940 saved_int_regs
= offsets
->saved_regs_mask
;
3942 /* Unfortunately, the insn
3944 ldmib sp, {..., sp, ...}
3946 triggers a bug on most SA-110 based devices, such that the stack
3947 pointer won't be correctly restored if the instruction takes a
3948 page fault. We work around this problem by popping r3 along with
3949 the other registers, since that is never slower than executing
3950 another instruction.
3952 We test for !arm_arch5 here, because code for any architecture
3953 less than this could potentially be run on one of the buggy
3955 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3957 /* Validate that r3 is a call-clobbered register (always true in
3958 the default abi) ... */
3959 if (!call_used_regs
[3])
3962 /* ... that it isn't being used for a return value ... */
3963 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3966 /* ... or for a tail-call argument ... */
3969 gcc_assert (CALL_P (sibling
));
3971 if (find_regno_fusage (sibling
, USE
, 3))
3975 /* ... and that there are no call-saved registers in r0-r2
3976 (always true in the default ABI). */
3977 if (saved_int_regs
& 0x7)
3981 /* Can't be done if interworking with Thumb, and any registers have been
3983 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3986 /* On StrongARM, conditional returns are expensive if they aren't
3987 taken and multiple registers have been stacked. */
3988 if (iscond
&& arm_tune_strongarm
)
3990 /* Conditional return when just the LR is stored is a simple
3991 conditional-load instruction, that's not expensive. */
3992 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3996 && arm_pic_register
!= INVALID_REGNUM
3997 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4001 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4002 several instructions if anything needs to be popped. */
4003 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4006 /* If there are saved registers but the LR isn't saved, then we need
4007 two instructions for the return. */
4008 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4011 /* Can't be done if any of the VFP regs are pushed,
4012 since this also requires an insn. */
4013 if (TARGET_HARD_FLOAT
)
4014 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4015 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4018 if (TARGET_REALLY_IWMMXT
)
4019 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4020 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4026 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4027 shrink-wrapping if possible. This is the case if we need to emit a
4028 prologue, which we can test by looking at the offsets. */
4030 use_simple_return_p (void)
4032 arm_stack_offsets
*offsets
;
4034 offsets
= arm_get_frame_offsets ();
4035 return offsets
->outgoing_args
!= 0;
4038 /* Return TRUE if int I is a valid immediate ARM constant. */
4041 const_ok_for_arm (HOST_WIDE_INT i
)
4045 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4046 be all zero, or all one. */
4047 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4048 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4049 != ((~(unsigned HOST_WIDE_INT
) 0)
4050 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4053 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4055 /* Fast return for 0 and small values. We must do this for zero, since
4056 the code below can't handle that one case. */
4057 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4060 /* Get the number of trailing zeros. */
4061 lowbit
= ffs((int) i
) - 1;
4063 /* Only even shifts are allowed in ARM mode so round down to the
4064 nearest even number. */
4068 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4073 /* Allow rotated constants in ARM mode. */
4075 && ((i
& ~0xc000003f) == 0
4076 || (i
& ~0xf000000f) == 0
4077 || (i
& ~0xfc000003) == 0))
4084 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4087 if (i
== v
|| i
== (v
| (v
<< 8)))
4090 /* Allow repeated pattern 0xXY00XY00. */
4100 /* Return true if I is a valid constant for the operation CODE. */
4102 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4104 if (const_ok_for_arm (i
))
4110 /* See if we can use movw. */
4111 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4114 /* Otherwise, try mvn. */
4115 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4118 /* See if we can use addw or subw. */
4120 && ((i
& 0xfffff000) == 0
4121 || ((-i
) & 0xfffff000) == 0))
4142 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4144 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4150 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4154 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4161 /* Return true if I is a valid di mode constant for the operation CODE. */
4163 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4165 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4166 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4167 rtx hi
= GEN_INT (hi_val
);
4168 rtx lo
= GEN_INT (lo_val
);
4178 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4179 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4181 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4188 /* Emit a sequence of insns to handle a large constant.
4189 CODE is the code of the operation required, it can be any of SET, PLUS,
4190 IOR, AND, XOR, MINUS;
4191 MODE is the mode in which the operation is being performed;
4192 VAL is the integer to operate on;
4193 SOURCE is the other operand (a register, or a null-pointer for SET);
4194 SUBTARGETS means it is safe to create scratch registers if that will
4195 either produce a simpler sequence, or we will want to cse the values.
4196 Return value is the number of insns emitted. */
4198 /* ??? Tweak this for thumb2. */
4200 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4201 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4205 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4206 cond
= COND_EXEC_TEST (PATTERN (insn
));
4210 if (subtargets
|| code
== SET
4211 || (REG_P (target
) && REG_P (source
)
4212 && REGNO (target
) != REGNO (source
)))
4214 /* After arm_reorg has been called, we can't fix up expensive
4215 constants by pushing them into memory so we must synthesize
4216 them in-line, regardless of the cost. This is only likely to
4217 be more costly on chips that have load delay slots and we are
4218 compiling without running the scheduler (so no splitting
4219 occurred before the final instruction emission).
4221 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4223 if (!cfun
->machine
->after_arm_reorg
4225 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4227 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4232 /* Currently SET is the only monadic value for CODE, all
4233 the rest are diadic. */
4234 if (TARGET_USE_MOVT
)
4235 arm_emit_movpair (target
, GEN_INT (val
));
4237 emit_set_insn (target
, GEN_INT (val
));
4243 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4245 if (TARGET_USE_MOVT
)
4246 arm_emit_movpair (temp
, GEN_INT (val
));
4248 emit_set_insn (temp
, GEN_INT (val
));
4250 /* For MINUS, the value is subtracted from, since we never
4251 have subtraction of a constant. */
4253 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4255 emit_set_insn (target
,
4256 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4262 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4266 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4267 ARM/THUMB2 immediates, and add up to VAL.
4268 Thr function return value gives the number of insns required. */
4270 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4271 struct four_ints
*return_sequence
)
4273 int best_consecutive_zeros
= 0;
4277 struct four_ints tmp_sequence
;
4279 /* If we aren't targeting ARM, the best place to start is always at
4280 the bottom, otherwise look more closely. */
4283 for (i
= 0; i
< 32; i
+= 2)
4285 int consecutive_zeros
= 0;
4287 if (!(val
& (3 << i
)))
4289 while ((i
< 32) && !(val
& (3 << i
)))
4291 consecutive_zeros
+= 2;
4294 if (consecutive_zeros
> best_consecutive_zeros
)
4296 best_consecutive_zeros
= consecutive_zeros
;
4297 best_start
= i
- consecutive_zeros
;
4304 /* So long as it won't require any more insns to do so, it's
4305 desirable to emit a small constant (in bits 0...9) in the last
4306 insn. This way there is more chance that it can be combined with
4307 a later addressing insn to form a pre-indexed load or store
4308 operation. Consider:
4310 *((volatile int *)0xe0000100) = 1;
4311 *((volatile int *)0xe0000110) = 2;
4313 We want this to wind up as:
4317 str rB, [rA, #0x100]
4319 str rB, [rA, #0x110]
4321 rather than having to synthesize both large constants from scratch.
4323 Therefore, we calculate how many insns would be required to emit
4324 the constant starting from `best_start', and also starting from
4325 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4326 yield a shorter sequence, we may as well use zero. */
4327 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4329 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4331 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4332 if (insns2
<= insns1
)
4334 *return_sequence
= tmp_sequence
;
4342 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4344 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4345 struct four_ints
*return_sequence
, int i
)
4347 int remainder
= val
& 0xffffffff;
4350 /* Try and find a way of doing the job in either two or three
4353 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4354 location. We start at position I. This may be the MSB, or
4355 optimial_immediate_sequence may have positioned it at the largest block
4356 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4357 wrapping around to the top of the word when we drop off the bottom.
4358 In the worst case this code should produce no more than four insns.
4360 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4361 constants, shifted to any arbitrary location. We should always start
4366 unsigned int b1
, b2
, b3
, b4
;
4367 unsigned HOST_WIDE_INT result
;
4370 gcc_assert (insns
< 4);
4375 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4376 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4379 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4380 /* We can use addw/subw for the last 12 bits. */
4384 /* Use an 8-bit shifted/rotated immediate. */
4388 result
= remainder
& ((0x0ff << end
)
4389 | ((i
< end
) ? (0xff >> (32 - end
))
4396 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4397 arbitrary shifts. */
4398 i
-= TARGET_ARM
? 2 : 1;
4402 /* Next, see if we can do a better job with a thumb2 replicated
4405 We do it this way around to catch the cases like 0x01F001E0 where
4406 two 8-bit immediates would work, but a replicated constant would
4409 TODO: 16-bit constants that don't clear all the bits, but still win.
4410 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4413 b1
= (remainder
& 0xff000000) >> 24;
4414 b2
= (remainder
& 0x00ff0000) >> 16;
4415 b3
= (remainder
& 0x0000ff00) >> 8;
4416 b4
= remainder
& 0xff;
4420 /* The 8-bit immediate already found clears b1 (and maybe b2),
4421 but must leave b3 and b4 alone. */
4423 /* First try to find a 32-bit replicated constant that clears
4424 almost everything. We can assume that we can't do it in one,
4425 or else we wouldn't be here. */
4426 unsigned int tmp
= b1
& b2
& b3
& b4
;
4427 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4429 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4430 + (tmp
== b3
) + (tmp
== b4
);
4432 && (matching_bytes
>= 3
4433 || (matching_bytes
== 2
4434 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4436 /* At least 3 of the bytes match, and the fourth has at
4437 least as many bits set, or two of the bytes match
4438 and it will only require one more insn to finish. */
4446 /* Second, try to find a 16-bit replicated constant that can
4447 leave three of the bytes clear. If b2 or b4 is already
4448 zero, then we can. If the 8-bit from above would not
4449 clear b2 anyway, then we still win. */
4450 else if (b1
== b3
&& (!b2
|| !b4
4451 || (remainder
& 0x00ff0000 & ~result
)))
4453 result
= remainder
& 0xff00ff00;
4459 /* The 8-bit immediate already found clears b2 (and maybe b3)
4460 and we don't get here unless b1 is alredy clear, but it will
4461 leave b4 unchanged. */
4463 /* If we can clear b2 and b4 at once, then we win, since the
4464 8-bits couldn't possibly reach that far. */
4467 result
= remainder
& 0x00ff00ff;
4473 return_sequence
->i
[insns
++] = result
;
4474 remainder
&= ~result
;
4476 if (code
== SET
|| code
== MINUS
)
4484 /* Emit an instruction with the indicated PATTERN. If COND is
4485 non-NULL, conditionalize the execution of the instruction on COND
4489 emit_constant_insn (rtx cond
, rtx pattern
)
4492 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4493 emit_insn (pattern
);
4496 /* As above, but extra parameter GENERATE which, if clear, suppresses
4500 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4501 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4502 int subtargets
, int generate
)
4506 int final_invert
= 0;
4508 int set_sign_bit_copies
= 0;
4509 int clear_sign_bit_copies
= 0;
4510 int clear_zero_bit_copies
= 0;
4511 int set_zero_bit_copies
= 0;
4512 int insns
= 0, neg_insns
, inv_insns
;
4513 unsigned HOST_WIDE_INT temp1
, temp2
;
4514 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4515 struct four_ints
*immediates
;
4516 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4518 /* Find out which operations are safe for a given CODE. Also do a quick
4519 check for degenerate cases; these can occur when DImode operations
4532 if (remainder
== 0xffffffff)
4535 emit_constant_insn (cond
,
4536 gen_rtx_SET (target
,
4537 GEN_INT (ARM_SIGN_EXTEND (val
))));
4543 if (reload_completed
&& rtx_equal_p (target
, source
))
4547 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4556 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4559 if (remainder
== 0xffffffff)
4561 if (reload_completed
&& rtx_equal_p (target
, source
))
4564 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4573 if (reload_completed
&& rtx_equal_p (target
, source
))
4576 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4580 if (remainder
== 0xffffffff)
4583 emit_constant_insn (cond
,
4584 gen_rtx_SET (target
,
4585 gen_rtx_NOT (mode
, source
)));
4592 /* We treat MINUS as (val - source), since (source - val) is always
4593 passed as (source + (-val)). */
4597 emit_constant_insn (cond
,
4598 gen_rtx_SET (target
,
4599 gen_rtx_NEG (mode
, source
)));
4602 if (const_ok_for_arm (val
))
4605 emit_constant_insn (cond
,
4606 gen_rtx_SET (target
,
4607 gen_rtx_MINUS (mode
, GEN_INT (val
),
4618 /* If we can do it in one insn get out quickly. */
4619 if (const_ok_for_op (val
, code
))
4622 emit_constant_insn (cond
,
4623 gen_rtx_SET (target
,
4625 ? gen_rtx_fmt_ee (code
, mode
, source
,
4631 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4633 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4634 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4638 if (mode
== SImode
&& i
== 16)
4639 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4641 emit_constant_insn (cond
,
4642 gen_zero_extendhisi2
4643 (target
, gen_lowpart (HImode
, source
)));
4645 /* Extz only supports SImode, but we can coerce the operands
4647 emit_constant_insn (cond
,
4648 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4649 gen_lowpart (SImode
, source
),
4650 GEN_INT (i
), const0_rtx
));
4656 /* Calculate a few attributes that may be useful for specific
4658 /* Count number of leading zeros. */
4659 for (i
= 31; i
>= 0; i
--)
4661 if ((remainder
& (1 << i
)) == 0)
4662 clear_sign_bit_copies
++;
4667 /* Count number of leading 1's. */
4668 for (i
= 31; i
>= 0; i
--)
4670 if ((remainder
& (1 << i
)) != 0)
4671 set_sign_bit_copies
++;
4676 /* Count number of trailing zero's. */
4677 for (i
= 0; i
<= 31; i
++)
4679 if ((remainder
& (1 << i
)) == 0)
4680 clear_zero_bit_copies
++;
4685 /* Count number of trailing 1's. */
4686 for (i
= 0; i
<= 31; i
++)
4688 if ((remainder
& (1 << i
)) != 0)
4689 set_zero_bit_copies
++;
4697 /* See if we can do this by sign_extending a constant that is known
4698 to be negative. This is a good, way of doing it, since the shift
4699 may well merge into a subsequent insn. */
4700 if (set_sign_bit_copies
> 1)
4702 if (const_ok_for_arm
4703 (temp1
= ARM_SIGN_EXTEND (remainder
4704 << (set_sign_bit_copies
- 1))))
4708 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4709 emit_constant_insn (cond
,
4710 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4711 emit_constant_insn (cond
,
4712 gen_ashrsi3 (target
, new_src
,
4713 GEN_INT (set_sign_bit_copies
- 1)));
4717 /* For an inverted constant, we will need to set the low bits,
4718 these will be shifted out of harm's way. */
4719 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4720 if (const_ok_for_arm (~temp1
))
4724 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4725 emit_constant_insn (cond
,
4726 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4727 emit_constant_insn (cond
,
4728 gen_ashrsi3 (target
, new_src
,
4729 GEN_INT (set_sign_bit_copies
- 1)));
4735 /* See if we can calculate the value as the difference between two
4736 valid immediates. */
4737 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4739 int topshift
= clear_sign_bit_copies
& ~1;
4741 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4742 & (0xff000000 >> topshift
));
4744 /* If temp1 is zero, then that means the 9 most significant
4745 bits of remainder were 1 and we've caused it to overflow.
4746 When topshift is 0 we don't need to do anything since we
4747 can borrow from 'bit 32'. */
4748 if (temp1
== 0 && topshift
!= 0)
4749 temp1
= 0x80000000 >> (topshift
- 1);
4751 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4753 if (const_ok_for_arm (temp2
))
4757 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4758 emit_constant_insn (cond
,
4759 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4760 emit_constant_insn (cond
,
4761 gen_addsi3 (target
, new_src
,
4769 /* See if we can generate this by setting the bottom (or the top)
4770 16 bits, and then shifting these into the other half of the
4771 word. We only look for the simplest cases, to do more would cost
4772 too much. Be careful, however, not to generate this when the
4773 alternative would take fewer insns. */
4774 if (val
& 0xffff0000)
4776 temp1
= remainder
& 0xffff0000;
4777 temp2
= remainder
& 0x0000ffff;
4779 /* Overlaps outside this range are best done using other methods. */
4780 for (i
= 9; i
< 24; i
++)
4782 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4783 && !const_ok_for_arm (temp2
))
4785 rtx new_src
= (subtargets
4786 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4788 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4789 source
, subtargets
, generate
);
4797 gen_rtx_ASHIFT (mode
, source
,
4804 /* Don't duplicate cases already considered. */
4805 for (i
= 17; i
< 24; i
++)
4807 if (((temp1
| (temp1
>> i
)) == remainder
)
4808 && !const_ok_for_arm (temp1
))
4810 rtx new_src
= (subtargets
4811 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4813 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4814 source
, subtargets
, generate
);
4819 gen_rtx_SET (target
,
4822 gen_rtx_LSHIFTRT (mode
, source
,
4833 /* If we have IOR or XOR, and the constant can be loaded in a
4834 single instruction, and we can find a temporary to put it in,
4835 then this can be done in two instructions instead of 3-4. */
4837 /* TARGET can't be NULL if SUBTARGETS is 0 */
4838 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4840 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4844 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4846 emit_constant_insn (cond
,
4847 gen_rtx_SET (sub
, GEN_INT (val
)));
4848 emit_constant_insn (cond
,
4849 gen_rtx_SET (target
,
4850 gen_rtx_fmt_ee (code
, mode
,
4861 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4862 and the remainder 0s for e.g. 0xfff00000)
4863 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4865 This can be done in 2 instructions by using shifts with mov or mvn.
4870 mvn r0, r0, lsr #12 */
4871 if (set_sign_bit_copies
> 8
4872 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4876 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4877 rtx shift
= GEN_INT (set_sign_bit_copies
);
4883 gen_rtx_ASHIFT (mode
,
4888 gen_rtx_SET (target
,
4890 gen_rtx_LSHIFTRT (mode
, sub
,
4897 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4899 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4901 For eg. r0 = r0 | 0xfff
4906 if (set_zero_bit_copies
> 8
4907 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4911 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4912 rtx shift
= GEN_INT (set_zero_bit_copies
);
4918 gen_rtx_LSHIFTRT (mode
,
4923 gen_rtx_SET (target
,
4925 gen_rtx_ASHIFT (mode
, sub
,
4931 /* This will never be reached for Thumb2 because orn is a valid
4932 instruction. This is for Thumb1 and the ARM 32 bit cases.
4934 x = y | constant (such that ~constant is a valid constant)
4936 x = ~(~y & ~constant).
4938 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4942 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4943 emit_constant_insn (cond
,
4945 gen_rtx_NOT (mode
, source
)));
4948 sub
= gen_reg_rtx (mode
);
4949 emit_constant_insn (cond
,
4951 gen_rtx_AND (mode
, source
,
4953 emit_constant_insn (cond
,
4954 gen_rtx_SET (target
,
4955 gen_rtx_NOT (mode
, sub
)));
4962 /* See if two shifts will do 2 or more insn's worth of work. */
4963 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4965 HOST_WIDE_INT shift_mask
= ((0xffffffff
4966 << (32 - clear_sign_bit_copies
))
4969 if ((remainder
| shift_mask
) != 0xffffffff)
4971 HOST_WIDE_INT new_val
4972 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4976 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4977 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4978 new_src
, source
, subtargets
, 1);
4983 rtx targ
= subtargets
? NULL_RTX
: target
;
4984 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4985 targ
, source
, subtargets
, 0);
4991 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4992 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4994 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4995 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5001 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5003 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5005 if ((remainder
| shift_mask
) != 0xffffffff)
5007 HOST_WIDE_INT new_val
5008 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5011 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5013 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5014 new_src
, source
, subtargets
, 1);
5019 rtx targ
= subtargets
? NULL_RTX
: target
;
5021 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5022 targ
, source
, subtargets
, 0);
5028 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5029 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5031 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5032 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5044 /* Calculate what the instruction sequences would be if we generated it
5045 normally, negated, or inverted. */
5047 /* AND cannot be split into multiple insns, so invert and use BIC. */
5050 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5053 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5058 if (can_invert
|| final_invert
)
5059 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5064 immediates
= &pos_immediates
;
5066 /* Is the negated immediate sequence more efficient? */
5067 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5070 immediates
= &neg_immediates
;
5075 /* Is the inverted immediate sequence more efficient?
5076 We must allow for an extra NOT instruction for XOR operations, although
5077 there is some chance that the final 'mvn' will get optimized later. */
5078 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5081 immediates
= &inv_immediates
;
5089 /* Now output the chosen sequence as instructions. */
5092 for (i
= 0; i
< insns
; i
++)
5094 rtx new_src
, temp1_rtx
;
5096 temp1
= immediates
->i
[i
];
5098 if (code
== SET
|| code
== MINUS
)
5099 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5100 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5101 new_src
= gen_reg_rtx (mode
);
5107 else if (can_negate
)
5110 temp1
= trunc_int_for_mode (temp1
, mode
);
5111 temp1_rtx
= GEN_INT (temp1
);
5115 else if (code
== MINUS
)
5116 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5118 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5120 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5125 can_negate
= can_invert
;
5129 else if (code
== MINUS
)
5137 emit_constant_insn (cond
, gen_rtx_SET (target
,
5138 gen_rtx_NOT (mode
, source
)));
5145 /* Canonicalize a comparison so that we are more likely to recognize it.
5146 This can be done for a few constant compares, where we can make the
5147 immediate value easier to load. */
5150 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5151 bool op0_preserve_value
)
5154 unsigned HOST_WIDE_INT i
, maxval
;
5156 mode
= GET_MODE (*op0
);
5157 if (mode
== VOIDmode
)
5158 mode
= GET_MODE (*op1
);
5160 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5162 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5163 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5164 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5165 for GTU/LEU in Thumb mode. */
5169 if (*code
== GT
|| *code
== LE
5170 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5172 /* Missing comparison. First try to use an available
5174 if (CONST_INT_P (*op1
))
5182 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5184 *op1
= GEN_INT (i
+ 1);
5185 *code
= *code
== GT
? GE
: LT
;
5191 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5192 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5194 *op1
= GEN_INT (i
+ 1);
5195 *code
= *code
== GTU
? GEU
: LTU
;
5204 /* If that did not work, reverse the condition. */
5205 if (!op0_preserve_value
)
5207 std::swap (*op0
, *op1
);
5208 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5214 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5215 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5216 to facilitate possible combining with a cmp into 'ands'. */
5218 && GET_CODE (*op0
) == ZERO_EXTEND
5219 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5220 && GET_MODE (XEXP (*op0
, 0)) == QImode
5221 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5222 && subreg_lowpart_p (XEXP (*op0
, 0))
5223 && *op1
== const0_rtx
)
5224 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5227 /* Comparisons smaller than DImode. Only adjust comparisons against
5228 an out-of-range constant. */
5229 if (!CONST_INT_P (*op1
)
5230 || const_ok_for_arm (INTVAL (*op1
))
5231 || const_ok_for_arm (- INTVAL (*op1
)))
5245 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5247 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5248 *code
= *code
== GT
? GE
: LT
;
5256 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5258 *op1
= GEN_INT (i
- 1);
5259 *code
= *code
== GE
? GT
: LE
;
5266 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5267 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5269 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5270 *code
= *code
== GTU
? GEU
: LTU
;
5278 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5280 *op1
= GEN_INT (i
- 1);
5281 *code
= *code
== GEU
? GTU
: LEU
;
5292 /* Define how to find the value returned by a function. */
5295 arm_function_value(const_tree type
, const_tree func
,
5296 bool outgoing ATTRIBUTE_UNUSED
)
5299 int unsignedp ATTRIBUTE_UNUSED
;
5300 rtx r ATTRIBUTE_UNUSED
;
5302 mode
= TYPE_MODE (type
);
5304 if (TARGET_AAPCS_BASED
)
5305 return aapcs_allocate_return_reg (mode
, type
, func
);
5307 /* Promote integer types. */
5308 if (INTEGRAL_TYPE_P (type
))
5309 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5311 /* Promotes small structs returned in a register to full-word size
5312 for big-endian AAPCS. */
5313 if (arm_return_in_msb (type
))
5315 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5316 if (size
% UNITS_PER_WORD
!= 0)
5318 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5319 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5323 return arm_libcall_value_1 (mode
);
5326 /* libcall hashtable helpers. */
5328 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5330 static inline hashval_t
hash (const rtx_def
*);
5331 static inline bool equal (const rtx_def
*, const rtx_def
*);
5332 static inline void remove (rtx_def
*);
5336 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5338 return rtx_equal_p (p1
, p2
);
5342 libcall_hasher::hash (const rtx_def
*p1
)
5344 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5347 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5350 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5352 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5356 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5358 static bool init_done
= false;
5359 static libcall_table_type
*libcall_htab
= NULL
;
5365 libcall_htab
= new libcall_table_type (31);
5366 add_libcall (libcall_htab
,
5367 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5368 add_libcall (libcall_htab
,
5369 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5370 add_libcall (libcall_htab
,
5371 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5372 add_libcall (libcall_htab
,
5373 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5375 add_libcall (libcall_htab
,
5376 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5377 add_libcall (libcall_htab
,
5378 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5379 add_libcall (libcall_htab
,
5380 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5381 add_libcall (libcall_htab
,
5382 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5384 add_libcall (libcall_htab
,
5385 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5386 add_libcall (libcall_htab
,
5387 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5388 add_libcall (libcall_htab
,
5389 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5390 add_libcall (libcall_htab
,
5391 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5392 add_libcall (libcall_htab
,
5393 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5394 add_libcall (libcall_htab
,
5395 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5396 add_libcall (libcall_htab
,
5397 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5398 add_libcall (libcall_htab
,
5399 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5401 /* Values from double-precision helper functions are returned in core
5402 registers if the selected core only supports single-precision
5403 arithmetic, even if we are using the hard-float ABI. The same is
5404 true for single-precision helpers, but we will never be using the
5405 hard-float ABI on a CPU which doesn't support single-precision
5406 operations in hardware. */
5407 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5408 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5409 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5410 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5411 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5412 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5413 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5414 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5415 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5416 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5417 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5418 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5420 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5422 add_libcall (libcall_htab
,
5423 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5426 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5430 arm_libcall_value_1 (machine_mode mode
)
5432 if (TARGET_AAPCS_BASED
)
5433 return aapcs_libcall_value (mode
);
5434 else if (TARGET_IWMMXT_ABI
5435 && arm_vector_mode_supported_p (mode
))
5436 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5438 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5441 /* Define how to find the value returned by a library function
5442 assuming the value has mode MODE. */
5445 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5447 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5448 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5450 /* The following libcalls return their result in integer registers,
5451 even though they return a floating point value. */
5452 if (arm_libcall_uses_aapcs_base (libcall
))
5453 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5457 return arm_libcall_value_1 (mode
);
5460 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5463 arm_function_value_regno_p (const unsigned int regno
)
5465 if (regno
== ARG_REGISTER (1)
5467 && TARGET_AAPCS_BASED
5468 && TARGET_HARD_FLOAT
5469 && regno
== FIRST_VFP_REGNUM
)
5470 || (TARGET_IWMMXT_ABI
5471 && regno
== FIRST_IWMMXT_REGNUM
))
5477 /* Determine the amount of memory needed to store the possible return
5478 registers of an untyped call. */
5480 arm_apply_result_size (void)
5486 if (TARGET_HARD_FLOAT_ABI
)
5488 if (TARGET_IWMMXT_ABI
)
5495 /* Decide whether TYPE should be returned in memory (true)
5496 or in a register (false). FNTYPE is the type of the function making
5499 arm_return_in_memory (const_tree type
, const_tree fntype
)
5503 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5505 if (TARGET_AAPCS_BASED
)
5507 /* Simple, non-aggregate types (ie not including vectors and
5508 complex) are always returned in a register (or registers).
5509 We don't care about which register here, so we can short-cut
5510 some of the detail. */
5511 if (!AGGREGATE_TYPE_P (type
)
5512 && TREE_CODE (type
) != VECTOR_TYPE
5513 && TREE_CODE (type
) != COMPLEX_TYPE
)
5516 /* Any return value that is no larger than one word can be
5518 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5521 /* Check any available co-processors to see if they accept the
5522 type as a register candidate (VFP, for example, can return
5523 some aggregates in consecutive registers). These aren't
5524 available if the call is variadic. */
5525 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5528 /* Vector values should be returned using ARM registers, not
5529 memory (unless they're over 16 bytes, which will break since
5530 we only have four call-clobbered registers to play with). */
5531 if (TREE_CODE (type
) == VECTOR_TYPE
)
5532 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5534 /* The rest go in memory. */
5538 if (TREE_CODE (type
) == VECTOR_TYPE
)
5539 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5541 if (!AGGREGATE_TYPE_P (type
) &&
5542 (TREE_CODE (type
) != VECTOR_TYPE
))
5543 /* All simple types are returned in registers. */
5546 if (arm_abi
!= ARM_ABI_APCS
)
5548 /* ATPCS and later return aggregate types in memory only if they are
5549 larger than a word (or are variable size). */
5550 return (size
< 0 || size
> UNITS_PER_WORD
);
5553 /* For the arm-wince targets we choose to be compatible with Microsoft's
5554 ARM and Thumb compilers, which always return aggregates in memory. */
5556 /* All structures/unions bigger than one word are returned in memory.
5557 Also catch the case where int_size_in_bytes returns -1. In this case
5558 the aggregate is either huge or of variable size, and in either case
5559 we will want to return it via memory and not in a register. */
5560 if (size
< 0 || size
> UNITS_PER_WORD
)
5563 if (TREE_CODE (type
) == RECORD_TYPE
)
5567 /* For a struct the APCS says that we only return in a register
5568 if the type is 'integer like' and every addressable element
5569 has an offset of zero. For practical purposes this means
5570 that the structure can have at most one non bit-field element
5571 and that this element must be the first one in the structure. */
5573 /* Find the first field, ignoring non FIELD_DECL things which will
5574 have been created by C++. */
5575 for (field
= TYPE_FIELDS (type
);
5576 field
&& TREE_CODE (field
) != FIELD_DECL
;
5577 field
= DECL_CHAIN (field
))
5581 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5583 /* Check that the first field is valid for returning in a register. */
5585 /* ... Floats are not allowed */
5586 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5589 /* ... Aggregates that are not themselves valid for returning in
5590 a register are not allowed. */
5591 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5594 /* Now check the remaining fields, if any. Only bitfields are allowed,
5595 since they are not addressable. */
5596 for (field
= DECL_CHAIN (field
);
5598 field
= DECL_CHAIN (field
))
5600 if (TREE_CODE (field
) != FIELD_DECL
)
5603 if (!DECL_BIT_FIELD_TYPE (field
))
5610 if (TREE_CODE (type
) == UNION_TYPE
)
5614 /* Unions can be returned in registers if every element is
5615 integral, or can be returned in an integer register. */
5616 for (field
= TYPE_FIELDS (type
);
5618 field
= DECL_CHAIN (field
))
5620 if (TREE_CODE (field
) != FIELD_DECL
)
5623 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5626 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5632 #endif /* not ARM_WINCE */
5634 /* Return all other types in memory. */
5638 const struct pcs_attribute_arg
5642 } pcs_attribute_args
[] =
5644 {"aapcs", ARM_PCS_AAPCS
},
5645 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5647 /* We could recognize these, but changes would be needed elsewhere
5648 * to implement them. */
5649 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5650 {"atpcs", ARM_PCS_ATPCS
},
5651 {"apcs", ARM_PCS_APCS
},
5653 {NULL
, ARM_PCS_UNKNOWN
}
5657 arm_pcs_from_attribute (tree attr
)
5659 const struct pcs_attribute_arg
*ptr
;
5662 /* Get the value of the argument. */
5663 if (TREE_VALUE (attr
) == NULL_TREE
5664 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5665 return ARM_PCS_UNKNOWN
;
5667 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5669 /* Check it against the list of known arguments. */
5670 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5671 if (streq (arg
, ptr
->arg
))
5674 /* An unrecognized interrupt type. */
5675 return ARM_PCS_UNKNOWN
;
5678 /* Get the PCS variant to use for this call. TYPE is the function's type
5679 specification, DECL is the specific declartion. DECL may be null if
5680 the call could be indirect or if this is a library call. */
5682 arm_get_pcs_model (const_tree type
, const_tree decl
)
5684 bool user_convention
= false;
5685 enum arm_pcs user_pcs
= arm_pcs_default
;
5690 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5693 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5694 user_convention
= true;
5697 if (TARGET_AAPCS_BASED
)
5699 /* Detect varargs functions. These always use the base rules
5700 (no argument is ever a candidate for a co-processor
5702 bool base_rules
= stdarg_p (type
);
5704 if (user_convention
)
5706 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5707 sorry ("non-AAPCS derived PCS variant");
5708 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5709 error ("variadic functions must use the base AAPCS variant");
5713 return ARM_PCS_AAPCS
;
5714 else if (user_convention
)
5716 else if (decl
&& flag_unit_at_a_time
)
5718 /* Local functions never leak outside this compilation unit,
5719 so we are free to use whatever conventions are
5721 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5722 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5724 return ARM_PCS_AAPCS_LOCAL
;
5727 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5728 sorry ("PCS variant");
5730 /* For everything else we use the target's default. */
5731 return arm_pcs_default
;
5736 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5737 const_tree fntype ATTRIBUTE_UNUSED
,
5738 rtx libcall ATTRIBUTE_UNUSED
,
5739 const_tree fndecl ATTRIBUTE_UNUSED
)
5741 /* Record the unallocated VFP registers. */
5742 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5743 pcum
->aapcs_vfp_reg_alloc
= 0;
5746 /* Walk down the type tree of TYPE counting consecutive base elements.
5747 If *MODEP is VOIDmode, then set it to the first valid floating point
5748 type. If a non-floating point type is found, or if a floating point
5749 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5750 otherwise return the count in the sub-tree. */
5752 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5757 switch (TREE_CODE (type
))
5760 mode
= TYPE_MODE (type
);
5761 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5764 if (*modep
== VOIDmode
)
5773 mode
= TYPE_MODE (TREE_TYPE (type
));
5774 if (mode
!= DFmode
&& mode
!= SFmode
)
5777 if (*modep
== VOIDmode
)
5786 /* Use V2SImode and V4SImode as representatives of all 64-bit
5787 and 128-bit vector types, whether or not those modes are
5788 supported with the present options. */
5789 size
= int_size_in_bytes (type
);
5802 if (*modep
== VOIDmode
)
5805 /* Vector modes are considered to be opaque: two vectors are
5806 equivalent for the purposes of being homogeneous aggregates
5807 if they are the same size. */
5816 tree index
= TYPE_DOMAIN (type
);
5818 /* Can't handle incomplete types nor sizes that are not
5820 if (!COMPLETE_TYPE_P (type
)
5821 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5824 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5827 || !TYPE_MAX_VALUE (index
)
5828 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5829 || !TYPE_MIN_VALUE (index
)
5830 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5834 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5835 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5837 /* There must be no padding. */
5838 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5850 /* Can't handle incomplete types nor sizes that are not
5852 if (!COMPLETE_TYPE_P (type
)
5853 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5856 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5858 if (TREE_CODE (field
) != FIELD_DECL
)
5861 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5867 /* There must be no padding. */
5868 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5875 case QUAL_UNION_TYPE
:
5877 /* These aren't very interesting except in a degenerate case. */
5882 /* Can't handle incomplete types nor sizes that are not
5884 if (!COMPLETE_TYPE_P (type
)
5885 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5888 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5890 if (TREE_CODE (field
) != FIELD_DECL
)
5893 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5896 count
= count
> sub_count
? count
: sub_count
;
5899 /* There must be no padding. */
5900 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5913 /* Return true if PCS_VARIANT should use VFP registers. */
5915 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5917 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5919 static bool seen_thumb1_vfp
= false;
5921 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5923 sorry ("Thumb-1 hard-float VFP ABI");
5924 /* sorry() is not immediately fatal, so only display this once. */
5925 seen_thumb1_vfp
= true;
5931 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5934 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5935 (TARGET_VFP_DOUBLE
|| !is_double
));
5938 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5939 suitable for passing or returning in VFP registers for the PCS
5940 variant selected. If it is, then *BASE_MODE is updated to contain
5941 a machine mode describing each element of the argument's type and
5942 *COUNT to hold the number of such elements. */
5944 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5945 machine_mode mode
, const_tree type
,
5946 machine_mode
*base_mode
, int *count
)
5948 machine_mode new_mode
= VOIDmode
;
5950 /* If we have the type information, prefer that to working things
5951 out from the mode. */
5954 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5956 if (ag_count
> 0 && ag_count
<= 4)
5961 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5962 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5963 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5968 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5971 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5977 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5980 *base_mode
= new_mode
;
5985 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5986 machine_mode mode
, const_tree type
)
5988 int count ATTRIBUTE_UNUSED
;
5989 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5991 if (!use_vfp_abi (pcs_variant
, false))
5993 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5998 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6001 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6004 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6005 &pcum
->aapcs_vfp_rmode
,
6006 &pcum
->aapcs_vfp_rcount
);
6009 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6010 for the behaviour of this function. */
6013 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6014 const_tree type ATTRIBUTE_UNUSED
)
6017 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6018 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6019 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6022 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6023 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6025 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6027 || (mode
== TImode
&& ! TARGET_NEON
)
6028 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6031 int rcount
= pcum
->aapcs_vfp_rcount
;
6033 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6037 /* Avoid using unsupported vector modes. */
6038 if (rmode
== V2SImode
)
6040 else if (rmode
== V4SImode
)
6047 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6048 for (i
= 0; i
< rcount
; i
++)
6050 rtx tmp
= gen_rtx_REG (rmode
,
6051 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6052 tmp
= gen_rtx_EXPR_LIST
6054 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6055 XVECEXP (par
, 0, i
) = tmp
;
6058 pcum
->aapcs_reg
= par
;
6061 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6067 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6068 comment there for the behaviour of this function. */
6071 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6073 const_tree type ATTRIBUTE_UNUSED
)
6075 if (!use_vfp_abi (pcs_variant
, false))
6079 || (GET_MODE_CLASS (mode
) == MODE_INT
6080 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6084 machine_mode ag_mode
;
6089 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6094 if (ag_mode
== V2SImode
)
6096 else if (ag_mode
== V4SImode
)
6102 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6103 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6104 for (i
= 0; i
< count
; i
++)
6106 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6107 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6108 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6109 XVECEXP (par
, 0, i
) = tmp
;
6115 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6119 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6120 machine_mode mode ATTRIBUTE_UNUSED
,
6121 const_tree type ATTRIBUTE_UNUSED
)
6123 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6124 pcum
->aapcs_vfp_reg_alloc
= 0;
6128 #define AAPCS_CP(X) \
6130 aapcs_ ## X ## _cum_init, \
6131 aapcs_ ## X ## _is_call_candidate, \
6132 aapcs_ ## X ## _allocate, \
6133 aapcs_ ## X ## _is_return_candidate, \
6134 aapcs_ ## X ## _allocate_return_reg, \
6135 aapcs_ ## X ## _advance \
6138 /* Table of co-processors that can be used to pass arguments in
6139 registers. Idealy no arugment should be a candidate for more than
6140 one co-processor table entry, but the table is processed in order
6141 and stops after the first match. If that entry then fails to put
6142 the argument into a co-processor register, the argument will go on
6146 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6147 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6149 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6150 BLKmode) is a candidate for this co-processor's registers; this
6151 function should ignore any position-dependent state in
6152 CUMULATIVE_ARGS and only use call-type dependent information. */
6153 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6155 /* Return true if the argument does get a co-processor register; it
6156 should set aapcs_reg to an RTX of the register allocated as is
6157 required for a return from FUNCTION_ARG. */
6158 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6160 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6161 be returned in this co-processor's registers. */
6162 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6164 /* Allocate and return an RTX element to hold the return type of a call. This
6165 routine must not fail and will only be called if is_return_candidate
6166 returned true with the same parameters. */
6167 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6169 /* Finish processing this argument and prepare to start processing
6171 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6172 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6180 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6185 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6186 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6193 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6195 /* We aren't passed a decl, so we can't check that a call is local.
6196 However, it isn't clear that that would be a win anyway, since it
6197 might limit some tail-calling opportunities. */
6198 enum arm_pcs pcs_variant
;
6202 const_tree fndecl
= NULL_TREE
;
6204 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6207 fntype
= TREE_TYPE (fntype
);
6210 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6213 pcs_variant
= arm_pcs_default
;
6215 if (pcs_variant
!= ARM_PCS_AAPCS
)
6219 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6220 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6229 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6232 /* We aren't passed a decl, so we can't check that a call is local.
6233 However, it isn't clear that that would be a win anyway, since it
6234 might limit some tail-calling opportunities. */
6235 enum arm_pcs pcs_variant
;
6236 int unsignedp ATTRIBUTE_UNUSED
;
6240 const_tree fndecl
= NULL_TREE
;
6242 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6245 fntype
= TREE_TYPE (fntype
);
6248 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6251 pcs_variant
= arm_pcs_default
;
6253 /* Promote integer types. */
6254 if (type
&& INTEGRAL_TYPE_P (type
))
6255 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6257 if (pcs_variant
!= ARM_PCS_AAPCS
)
6261 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6262 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6264 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6268 /* Promotes small structs returned in a register to full-word size
6269 for big-endian AAPCS. */
6270 if (type
&& arm_return_in_msb (type
))
6272 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6273 if (size
% UNITS_PER_WORD
!= 0)
6275 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6276 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6280 return gen_rtx_REG (mode
, R0_REGNUM
);
6284 aapcs_libcall_value (machine_mode mode
)
6286 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6287 && GET_MODE_SIZE (mode
) <= 4)
6290 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6293 /* Lay out a function argument using the AAPCS rules. The rule
6294 numbers referred to here are those in the AAPCS. */
6296 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6297 const_tree type
, bool named
)
6302 /* We only need to do this once per argument. */
6303 if (pcum
->aapcs_arg_processed
)
6306 pcum
->aapcs_arg_processed
= true;
6308 /* Special case: if named is false then we are handling an incoming
6309 anonymous argument which is on the stack. */
6313 /* Is this a potential co-processor register candidate? */
6314 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6316 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6317 pcum
->aapcs_cprc_slot
= slot
;
6319 /* We don't have to apply any of the rules from part B of the
6320 preparation phase, these are handled elsewhere in the
6325 /* A Co-processor register candidate goes either in its own
6326 class of registers or on the stack. */
6327 if (!pcum
->aapcs_cprc_failed
[slot
])
6329 /* C1.cp - Try to allocate the argument to co-processor
6331 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6334 /* C2.cp - Put the argument on the stack and note that we
6335 can't assign any more candidates in this slot. We also
6336 need to note that we have allocated stack space, so that
6337 we won't later try to split a non-cprc candidate between
6338 core registers and the stack. */
6339 pcum
->aapcs_cprc_failed
[slot
] = true;
6340 pcum
->can_split
= false;
6343 /* We didn't get a register, so this argument goes on the
6345 gcc_assert (pcum
->can_split
== false);
6350 /* C3 - For double-word aligned arguments, round the NCRN up to the
6351 next even number. */
6352 ncrn
= pcum
->aapcs_ncrn
;
6353 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6356 nregs
= ARM_NUM_REGS2(mode
, type
);
6358 /* Sigh, this test should really assert that nregs > 0, but a GCC
6359 extension allows empty structs and then gives them empty size; it
6360 then allows such a structure to be passed by value. For some of
6361 the code below we have to pretend that such an argument has
6362 non-zero size so that we 'locate' it correctly either in
6363 registers or on the stack. */
6364 gcc_assert (nregs
>= 0);
6366 nregs2
= nregs
? nregs
: 1;
6368 /* C4 - Argument fits entirely in core registers. */
6369 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6371 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6372 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6376 /* C5 - Some core registers left and there are no arguments already
6377 on the stack: split this argument between the remaining core
6378 registers and the stack. */
6379 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6381 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6382 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6383 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6387 /* C6 - NCRN is set to 4. */
6388 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6390 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6394 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6395 for a call to a function whose data type is FNTYPE.
6396 For a library call, FNTYPE is NULL. */
6398 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6400 tree fndecl ATTRIBUTE_UNUSED
)
6402 /* Long call handling. */
6404 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6406 pcum
->pcs_variant
= arm_pcs_default
;
6408 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6410 if (arm_libcall_uses_aapcs_base (libname
))
6411 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6413 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6414 pcum
->aapcs_reg
= NULL_RTX
;
6415 pcum
->aapcs_partial
= 0;
6416 pcum
->aapcs_arg_processed
= false;
6417 pcum
->aapcs_cprc_slot
= -1;
6418 pcum
->can_split
= true;
6420 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6424 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6426 pcum
->aapcs_cprc_failed
[i
] = false;
6427 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6435 /* On the ARM, the offset starts at 0. */
6437 pcum
->iwmmxt_nregs
= 0;
6438 pcum
->can_split
= true;
6440 /* Varargs vectors are treated the same as long long.
6441 named_count avoids having to change the way arm handles 'named' */
6442 pcum
->named_count
= 0;
6445 if (TARGET_REALLY_IWMMXT
&& fntype
)
6449 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6451 fn_arg
= TREE_CHAIN (fn_arg
))
6452 pcum
->named_count
+= 1;
6454 if (! pcum
->named_count
)
6455 pcum
->named_count
= INT_MAX
;
6459 /* Return true if mode/type need doubleword alignment. */
6461 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6464 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6466 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6467 if (!AGGREGATE_TYPE_P (type
))
6468 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6470 /* Array types: Use member alignment of element type. */
6471 if (TREE_CODE (type
) == ARRAY_TYPE
)
6472 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6474 /* Record/aggregate types: Use greatest member alignment of any member. */
6475 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6476 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6483 /* Determine where to put an argument to a function.
6484 Value is zero to push the argument on the stack,
6485 or a hard register in which to store the argument.
6487 MODE is the argument's machine mode.
6488 TYPE is the data type of the argument (as a tree).
6489 This is null for libcalls where that information may
6491 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6492 the preceding args and about the function being called.
6493 NAMED is nonzero if this argument is a named parameter
6494 (otherwise it is an extra parameter matching an ellipsis).
6496 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6497 other arguments are passed on the stack. If (NAMED == 0) (which happens
6498 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6499 defined), say it is passed in the stack (function_prologue will
6500 indeed make it pass in the stack if necessary). */
6503 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6504 const_tree type
, bool named
)
6506 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6509 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6510 a call insn (op3 of a call_value insn). */
6511 if (mode
== VOIDmode
)
6514 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6516 aapcs_layout_arg (pcum
, mode
, type
, named
);
6517 return pcum
->aapcs_reg
;
6520 /* Varargs vectors are treated the same as long long.
6521 named_count avoids having to change the way arm handles 'named' */
6522 if (TARGET_IWMMXT_ABI
6523 && arm_vector_mode_supported_p (mode
)
6524 && pcum
->named_count
> pcum
->nargs
+ 1)
6526 if (pcum
->iwmmxt_nregs
<= 9)
6527 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6530 pcum
->can_split
= false;
6535 /* Put doubleword aligned quantities in even register pairs. */
6537 && ARM_DOUBLEWORD_ALIGN
6538 && arm_needs_doubleword_align (mode
, type
))
6541 /* Only allow splitting an arg between regs and memory if all preceding
6542 args were allocated to regs. For args passed by reference we only count
6543 the reference pointer. */
6544 if (pcum
->can_split
)
6547 nregs
= ARM_NUM_REGS2 (mode
, type
);
6549 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6552 return gen_rtx_REG (mode
, pcum
->nregs
);
6556 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6558 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6559 ? DOUBLEWORD_ALIGNMENT
6564 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6565 tree type
, bool named
)
6567 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6568 int nregs
= pcum
->nregs
;
6570 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6572 aapcs_layout_arg (pcum
, mode
, type
, named
);
6573 return pcum
->aapcs_partial
;
6576 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6579 if (NUM_ARG_REGS
> nregs
6580 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6582 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6587 /* Update the data in PCUM to advance over an argument
6588 of mode MODE and data type TYPE.
6589 (TYPE is null for libcalls where that information may not be available.) */
6592 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6593 const_tree type
, bool named
)
6595 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6597 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6599 aapcs_layout_arg (pcum
, mode
, type
, named
);
6601 if (pcum
->aapcs_cprc_slot
>= 0)
6603 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6605 pcum
->aapcs_cprc_slot
= -1;
6608 /* Generic stuff. */
6609 pcum
->aapcs_arg_processed
= false;
6610 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6611 pcum
->aapcs_reg
= NULL_RTX
;
6612 pcum
->aapcs_partial
= 0;
6617 if (arm_vector_mode_supported_p (mode
)
6618 && pcum
->named_count
> pcum
->nargs
6619 && TARGET_IWMMXT_ABI
)
6620 pcum
->iwmmxt_nregs
+= 1;
6622 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6626 /* Variable sized types are passed by reference. This is a GCC
6627 extension to the ARM ABI. */
6630 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6631 machine_mode mode ATTRIBUTE_UNUSED
,
6632 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6634 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6637 /* Encode the current state of the #pragma [no_]long_calls. */
6640 OFF
, /* No #pragma [no_]long_calls is in effect. */
6641 LONG
, /* #pragma long_calls is in effect. */
6642 SHORT
/* #pragma no_long_calls is in effect. */
6645 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6648 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6650 arm_pragma_long_calls
= LONG
;
6654 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6656 arm_pragma_long_calls
= SHORT
;
6660 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6662 arm_pragma_long_calls
= OFF
;
6665 /* Handle an attribute requiring a FUNCTION_DECL;
6666 arguments as in struct attribute_spec.handler. */
6668 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6669 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6671 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6673 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6675 *no_add_attrs
= true;
6681 /* Handle an "interrupt" or "isr" attribute;
6682 arguments as in struct attribute_spec.handler. */
6684 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6689 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6691 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6693 *no_add_attrs
= true;
6695 /* FIXME: the argument if any is checked for type attributes;
6696 should it be checked for decl ones? */
6700 if (TREE_CODE (*node
) == FUNCTION_TYPE
6701 || TREE_CODE (*node
) == METHOD_TYPE
)
6703 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6705 warning (OPT_Wattributes
, "%qE attribute ignored",
6707 *no_add_attrs
= true;
6710 else if (TREE_CODE (*node
) == POINTER_TYPE
6711 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6712 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6713 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6715 *node
= build_variant_type_copy (*node
);
6716 TREE_TYPE (*node
) = build_type_attribute_variant
6718 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6719 *no_add_attrs
= true;
6723 /* Possibly pass this attribute on from the type to a decl. */
6724 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6725 | (int) ATTR_FLAG_FUNCTION_NEXT
6726 | (int) ATTR_FLAG_ARRAY_NEXT
))
6728 *no_add_attrs
= true;
6729 return tree_cons (name
, args
, NULL_TREE
);
6733 warning (OPT_Wattributes
, "%qE attribute ignored",
6742 /* Handle a "pcs" attribute; arguments as in struct
6743 attribute_spec.handler. */
6745 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6746 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6748 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6750 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6751 *no_add_attrs
= true;
6756 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6757 /* Handle the "notshared" attribute. This attribute is another way of
6758 requesting hidden visibility. ARM's compiler supports
6759 "__declspec(notshared)"; we support the same thing via an
6763 arm_handle_notshared_attribute (tree
*node
,
6764 tree name ATTRIBUTE_UNUSED
,
6765 tree args ATTRIBUTE_UNUSED
,
6766 int flags ATTRIBUTE_UNUSED
,
6769 tree decl
= TYPE_NAME (*node
);
6773 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6774 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6775 *no_add_attrs
= false;
6781 /* This function returns true if a function with declaration FNDECL and type
6782 FNTYPE uses the stack to pass arguments or return variables and false
6783 otherwise. This is used for functions with the attributes
6784 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6785 diagnostic messages if the stack is used. NAME is the name of the attribute
6789 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6791 function_args_iterator args_iter
;
6792 CUMULATIVE_ARGS args_so_far_v
;
6793 cumulative_args_t args_so_far
;
6794 bool first_param
= true;
6795 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6797 /* Error out if any argument is passed on the stack. */
6798 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6799 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6800 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6803 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6805 prev_arg_type
= arg_type
;
6806 if (VOID_TYPE_P (arg_type
))
6810 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6811 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6813 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6815 error ("%qE attribute not available to functions with arguments "
6816 "passed on the stack", name
);
6819 first_param
= false;
6822 /* Error out for variadic functions since we cannot control how many
6823 arguments will be passed and thus stack could be used. stdarg_p () is not
6824 used for the checking to avoid browsing arguments twice. */
6825 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6827 error ("%qE attribute not available to functions with variable number "
6828 "of arguments", name
);
6832 /* Error out if return value is passed on the stack. */
6833 ret_type
= TREE_TYPE (fntype
);
6834 if (arm_return_in_memory (ret_type
, fntype
))
6836 error ("%qE attribute not available to functions that return value on "
6843 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6844 function will check whether the attribute is allowed here and will add the
6845 attribute to the function declaration tree or otherwise issue a warning. */
6848 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6857 *no_add_attrs
= true;
6858 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6863 /* Ignore attribute for function types. */
6864 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6866 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6868 *no_add_attrs
= true;
6874 /* Warn for static linkage functions. */
6875 if (!TREE_PUBLIC (fndecl
))
6877 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6878 "with static linkage", name
);
6879 *no_add_attrs
= true;
6883 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6884 TREE_TYPE (fndecl
));
6889 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6890 function will check whether the attribute is allowed here and will add the
6891 attribute to the function type tree or otherwise issue a diagnostic. The
6892 reason we check this at declaration time is to only allow the use of the
6893 attribute with declarations of function pointers and not function
6894 declarations. This function checks NODE is of the expected type and issues
6895 diagnostics otherwise using NAME. If it is not of the expected type
6896 *NO_ADD_ATTRS will be set to true. */
6899 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6904 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6909 *no_add_attrs
= true;
6910 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6915 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
6918 fntype
= TREE_TYPE (decl
);
6921 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
6922 fntype
= TREE_TYPE (fntype
);
6924 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
6926 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
6927 "function pointer", name
);
6928 *no_add_attrs
= true;
6932 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
6937 /* Prevent trees being shared among function types with and without
6938 cmse_nonsecure_call attribute. */
6939 type
= TREE_TYPE (decl
);
6941 type
= build_distinct_type_copy (type
);
6942 TREE_TYPE (decl
) = type
;
6945 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
6948 fntype
= TREE_TYPE (fntype
);
6949 fntype
= build_distinct_type_copy (fntype
);
6950 TREE_TYPE (type
) = fntype
;
6953 /* Construct a type attribute and add it to the function type. */
6954 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
6955 TYPE_ATTRIBUTES (fntype
));
6956 TYPE_ATTRIBUTES (fntype
) = attrs
;
6960 /* Return 0 if the attributes for two types are incompatible, 1 if they
6961 are compatible, and 2 if they are nearly compatible (which causes a
6962 warning to be generated). */
6964 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6968 /* Check for mismatch of non-default calling convention. */
6969 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6972 /* Check for mismatched call attributes. */
6973 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6974 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6975 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6976 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6978 /* Only bother to check if an attribute is defined. */
6979 if (l1
| l2
| s1
| s2
)
6981 /* If one type has an attribute, the other must have the same attribute. */
6982 if ((l1
!= l2
) || (s1
!= s2
))
6985 /* Disallow mixed attributes. */
6986 if ((l1
& s2
) || (l2
& s1
))
6990 /* Check for mismatched ISR attribute. */
6991 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6993 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6994 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6996 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7000 l1
= lookup_attribute ("cmse_nonsecure_call",
7001 TYPE_ATTRIBUTES (type1
)) != NULL
;
7002 l2
= lookup_attribute ("cmse_nonsecure_call",
7003 TYPE_ATTRIBUTES (type2
)) != NULL
;
7011 /* Assigns default attributes to newly defined type. This is used to
7012 set short_call/long_call attributes for function types of
7013 functions defined inside corresponding #pragma scopes. */
7015 arm_set_default_type_attributes (tree type
)
7017 /* Add __attribute__ ((long_call)) to all functions, when
7018 inside #pragma long_calls or __attribute__ ((short_call)),
7019 when inside #pragma no_long_calls. */
7020 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7022 tree type_attr_list
, attr_name
;
7023 type_attr_list
= TYPE_ATTRIBUTES (type
);
7025 if (arm_pragma_long_calls
== LONG
)
7026 attr_name
= get_identifier ("long_call");
7027 else if (arm_pragma_long_calls
== SHORT
)
7028 attr_name
= get_identifier ("short_call");
7032 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7033 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7037 /* Return true if DECL is known to be linked into section SECTION. */
7040 arm_function_in_section_p (tree decl
, section
*section
)
7042 /* We can only be certain about the prevailing symbol definition. */
7043 if (!decl_binds_to_current_def_p (decl
))
7046 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7047 if (!DECL_SECTION_NAME (decl
))
7049 /* Make sure that we will not create a unique section for DECL. */
7050 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7054 return function_section (decl
) == section
;
7057 /* Return nonzero if a 32-bit "long_call" should be generated for
7058 a call from the current function to DECL. We generate a long_call
7061 a. has an __attribute__((long call))
7062 or b. is within the scope of a #pragma long_calls
7063 or c. the -mlong-calls command line switch has been specified
7065 However we do not generate a long call if the function:
7067 d. has an __attribute__ ((short_call))
7068 or e. is inside the scope of a #pragma no_long_calls
7069 or f. is defined in the same section as the current function. */
7072 arm_is_long_call_p (tree decl
)
7077 return TARGET_LONG_CALLS
;
7079 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7080 if (lookup_attribute ("short_call", attrs
))
7083 /* For "f", be conservative, and only cater for cases in which the
7084 whole of the current function is placed in the same section. */
7085 if (!flag_reorder_blocks_and_partition
7086 && TREE_CODE (decl
) == FUNCTION_DECL
7087 && arm_function_in_section_p (decl
, current_function_section ()))
7090 if (lookup_attribute ("long_call", attrs
))
7093 return TARGET_LONG_CALLS
;
7096 /* Return nonzero if it is ok to make a tail-call to DECL. */
7098 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7100 unsigned long func_type
;
7102 if (cfun
->machine
->sibcall_blocked
)
7105 /* Never tailcall something if we are generating code for Thumb-1. */
7109 /* The PIC register is live on entry to VxWorks PLT entries, so we
7110 must make the call before restoring the PIC register. */
7111 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7114 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7115 may be used both as target of the call and base register for restoring
7116 the VFP registers */
7117 if (TARGET_APCS_FRAME
&& TARGET_ARM
7118 && TARGET_HARD_FLOAT
7119 && decl
&& arm_is_long_call_p (decl
))
7122 /* If we are interworking and the function is not declared static
7123 then we can't tail-call it unless we know that it exists in this
7124 compilation unit (since it might be a Thumb routine). */
7125 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7126 && !TREE_ASM_WRITTEN (decl
))
7129 func_type
= arm_current_func_type ();
7130 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7131 if (IS_INTERRUPT (func_type
))
7134 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7135 generated for entry functions themselves. */
7136 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7139 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7140 this would complicate matters for later code generation. */
7141 if (TREE_CODE (exp
) == CALL_EXPR
)
7143 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7144 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7148 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7150 /* Check that the return value locations are the same. For
7151 example that we aren't returning a value from the sibling in
7152 a VFP register but then need to transfer it to a core
7155 tree decl_or_type
= decl
;
7157 /* If it is an indirect function pointer, get the function type. */
7159 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7161 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7162 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7164 if (!rtx_equal_p (a
, b
))
7168 /* Never tailcall if function may be called with a misaligned SP. */
7169 if (IS_STACKALIGN (func_type
))
7172 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7173 references should become a NOP. Don't convert such calls into
7175 if (TARGET_AAPCS_BASED
7176 && arm_abi
== ARM_ABI_AAPCS
7178 && DECL_WEAK (decl
))
7181 /* We cannot do a tailcall for an indirect call by descriptor if all the
7182 argument registers are used because the only register left to load the
7183 address is IP and it will already contain the static chain. */
7184 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7186 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7187 CUMULATIVE_ARGS cum
;
7188 cumulative_args_t cum_v
;
7190 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7191 cum_v
= pack_cumulative_args (&cum
);
7193 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7195 tree type
= TREE_VALUE (t
);
7196 if (!VOID_TYPE_P (type
))
7197 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7200 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7204 /* Everything else is ok. */
7209 /* Addressing mode support functions. */
7211 /* Return nonzero if X is a legitimate immediate operand when compiling
7212 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7214 legitimate_pic_operand_p (rtx x
)
7216 if (GET_CODE (x
) == SYMBOL_REF
7217 || (GET_CODE (x
) == CONST
7218 && GET_CODE (XEXP (x
, 0)) == PLUS
7219 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7225 /* Record that the current function needs a PIC register. Initialize
7226 cfun->machine->pic_reg if we have not already done so. */
7229 require_pic_register (void)
7231 /* A lot of the logic here is made obscure by the fact that this
7232 routine gets called as part of the rtx cost estimation process.
7233 We don't want those calls to affect any assumptions about the real
7234 function; and further, we can't call entry_of_function() until we
7235 start the real expansion process. */
7236 if (!crtl
->uses_pic_offset_table
)
7238 gcc_assert (can_create_pseudo_p ());
7239 if (arm_pic_register
!= INVALID_REGNUM
7240 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7242 if (!cfun
->machine
->pic_reg
)
7243 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7245 /* Play games to avoid marking the function as needing pic
7246 if we are being called as part of the cost-estimation
7248 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7249 crtl
->uses_pic_offset_table
= 1;
7253 rtx_insn
*seq
, *insn
;
7255 if (!cfun
->machine
->pic_reg
)
7256 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7258 /* Play games to avoid marking the function as needing pic
7259 if we are being called as part of the cost-estimation
7261 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7263 crtl
->uses_pic_offset_table
= 1;
7266 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7267 && arm_pic_register
> LAST_LO_REGNUM
)
7268 emit_move_insn (cfun
->machine
->pic_reg
,
7269 gen_rtx_REG (Pmode
, arm_pic_register
));
7271 arm_load_pic_register (0UL);
7276 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7278 INSN_LOCATION (insn
) = prologue_location
;
7280 /* We can be called during expansion of PHI nodes, where
7281 we can't yet emit instructions directly in the final
7282 insn stream. Queue the insns on the entry edge, they will
7283 be committed after everything else is expanded. */
7284 insert_insn_on_edge (seq
,
7285 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7292 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7294 if (GET_CODE (orig
) == SYMBOL_REF
7295 || GET_CODE (orig
) == LABEL_REF
)
7299 gcc_assert (can_create_pseudo_p ());
7300 reg
= gen_reg_rtx (Pmode
);
7303 /* VxWorks does not impose a fixed gap between segments; the run-time
7304 gap can be different from the object-file gap. We therefore can't
7305 use GOTOFF unless we are absolutely sure that the symbol is in the
7306 same segment as the GOT. Unfortunately, the flexibility of linker
7307 scripts means that we can't be sure of that in general, so assume
7308 that GOTOFF is never valid on VxWorks. */
7309 /* References to weak symbols cannot be resolved locally: they
7310 may be overridden by a non-weak definition at link time. */
7312 if ((GET_CODE (orig
) == LABEL_REF
7313 || (GET_CODE (orig
) == SYMBOL_REF
7314 && SYMBOL_REF_LOCAL_P (orig
)
7315 && (SYMBOL_REF_DECL (orig
)
7316 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7318 && arm_pic_data_is_text_relative
)
7319 insn
= arm_pic_static_addr (orig
, reg
);
7325 /* If this function doesn't have a pic register, create one now. */
7326 require_pic_register ();
7328 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7330 /* Make the MEM as close to a constant as possible. */
7331 mem
= SET_SRC (pat
);
7332 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7333 MEM_READONLY_P (mem
) = 1;
7334 MEM_NOTRAP_P (mem
) = 1;
7336 insn
= emit_insn (pat
);
7339 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7341 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7345 else if (GET_CODE (orig
) == CONST
)
7349 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7350 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7353 /* Handle the case where we have: const (UNSPEC_TLS). */
7354 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7355 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7358 /* Handle the case where we have:
7359 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7361 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7362 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7363 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7365 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7371 gcc_assert (can_create_pseudo_p ());
7372 reg
= gen_reg_rtx (Pmode
);
7375 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7377 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7378 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7379 base
== reg
? 0 : reg
);
7381 if (CONST_INT_P (offset
))
7383 /* The base register doesn't really matter, we only want to
7384 test the index for the appropriate mode. */
7385 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7387 gcc_assert (can_create_pseudo_p ());
7388 offset
= force_reg (Pmode
, offset
);
7391 if (CONST_INT_P (offset
))
7392 return plus_constant (Pmode
, base
, INTVAL (offset
));
7395 if (GET_MODE_SIZE (mode
) > 4
7396 && (GET_MODE_CLASS (mode
) == MODE_INT
7397 || TARGET_SOFT_FLOAT
))
7399 emit_insn (gen_addsi3 (reg
, base
, offset
));
7403 return gen_rtx_PLUS (Pmode
, base
, offset
);
7410 /* Find a spare register to use during the prolog of a function. */
7413 thumb_find_work_register (unsigned long pushed_regs_mask
)
7417 /* Check the argument registers first as these are call-used. The
7418 register allocation order means that sometimes r3 might be used
7419 but earlier argument registers might not, so check them all. */
7420 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7421 if (!df_regs_ever_live_p (reg
))
7424 /* Before going on to check the call-saved registers we can try a couple
7425 more ways of deducing that r3 is available. The first is when we are
7426 pushing anonymous arguments onto the stack and we have less than 4
7427 registers worth of fixed arguments(*). In this case r3 will be part of
7428 the variable argument list and so we can be sure that it will be
7429 pushed right at the start of the function. Hence it will be available
7430 for the rest of the prologue.
7431 (*): ie crtl->args.pretend_args_size is greater than 0. */
7432 if (cfun
->machine
->uses_anonymous_args
7433 && crtl
->args
.pretend_args_size
> 0)
7434 return LAST_ARG_REGNUM
;
7436 /* The other case is when we have fixed arguments but less than 4 registers
7437 worth. In this case r3 might be used in the body of the function, but
7438 it is not being used to convey an argument into the function. In theory
7439 we could just check crtl->args.size to see how many bytes are
7440 being passed in argument registers, but it seems that it is unreliable.
7441 Sometimes it will have the value 0 when in fact arguments are being
7442 passed. (See testcase execute/20021111-1.c for an example). So we also
7443 check the args_info.nregs field as well. The problem with this field is
7444 that it makes no allowances for arguments that are passed to the
7445 function but which are not used. Hence we could miss an opportunity
7446 when a function has an unused argument in r3. But it is better to be
7447 safe than to be sorry. */
7448 if (! cfun
->machine
->uses_anonymous_args
7449 && crtl
->args
.size
>= 0
7450 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7451 && (TARGET_AAPCS_BASED
7452 ? crtl
->args
.info
.aapcs_ncrn
< 4
7453 : crtl
->args
.info
.nregs
< 4))
7454 return LAST_ARG_REGNUM
;
7456 /* Otherwise look for a call-saved register that is going to be pushed. */
7457 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7458 if (pushed_regs_mask
& (1 << reg
))
7463 /* Thumb-2 can use high regs. */
7464 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7465 if (pushed_regs_mask
& (1 << reg
))
7468 /* Something went wrong - thumb_compute_save_reg_mask()
7469 should have arranged for a suitable register to be pushed. */
7473 static GTY(()) int pic_labelno
;
7475 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7479 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7481 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7483 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7486 gcc_assert (flag_pic
);
7488 pic_reg
= cfun
->machine
->pic_reg
;
7489 if (TARGET_VXWORKS_RTP
)
7491 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7492 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7493 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7495 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7497 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7498 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7502 /* We use an UNSPEC rather than a LABEL_REF because this label
7503 never appears in the code stream. */
7505 labelno
= GEN_INT (pic_labelno
++);
7506 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7507 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7509 /* On the ARM the PC register contains 'dot + 8' at the time of the
7510 addition, on the Thumb it is 'dot + 4'. */
7511 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7512 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7514 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7518 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7520 else /* TARGET_THUMB1 */
7522 if (arm_pic_register
!= INVALID_REGNUM
7523 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7525 /* We will have pushed the pic register, so we should always be
7526 able to find a work register. */
7527 pic_tmp
= gen_rtx_REG (SImode
,
7528 thumb_find_work_register (saved_regs
));
7529 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7530 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7531 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7533 else if (arm_pic_register
!= INVALID_REGNUM
7534 && arm_pic_register
> LAST_LO_REGNUM
7535 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7537 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7538 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7539 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7542 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7546 /* Need to emit this whether or not we obey regdecls,
7547 since setjmp/longjmp can cause life info to screw up. */
7551 /* Generate code to load the address of a static var when flag_pic is set. */
7553 arm_pic_static_addr (rtx orig
, rtx reg
)
7555 rtx l1
, labelno
, offset_rtx
;
7557 gcc_assert (flag_pic
);
7559 /* We use an UNSPEC rather than a LABEL_REF because this label
7560 never appears in the code stream. */
7561 labelno
= GEN_INT (pic_labelno
++);
7562 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7563 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7565 /* On the ARM the PC register contains 'dot + 8' at the time of the
7566 addition, on the Thumb it is 'dot + 4'. */
7567 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7568 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7569 UNSPEC_SYMBOL_OFFSET
);
7570 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7572 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7575 /* Return nonzero if X is valid as an ARM state addressing register. */
7577 arm_address_register_rtx_p (rtx x
, int strict_p
)
7587 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7589 return (regno
<= LAST_ARM_REGNUM
7590 || regno
>= FIRST_PSEUDO_REGISTER
7591 || regno
== FRAME_POINTER_REGNUM
7592 || regno
== ARG_POINTER_REGNUM
);
7595 /* Return TRUE if this rtx is the difference of a symbol and a label,
7596 and will reduce to a PC-relative relocation in the object file.
7597 Expressions like this can be left alone when generating PIC, rather
7598 than forced through the GOT. */
7600 pcrel_constant_p (rtx x
)
7602 if (GET_CODE (x
) == MINUS
)
7603 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7608 /* Return true if X will surely end up in an index register after next
7611 will_be_in_index_register (const_rtx x
)
7613 /* arm.md: calculate_pic_address will split this into a register. */
7614 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7617 /* Return nonzero if X is a valid ARM state address operand. */
7619 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7623 enum rtx_code code
= GET_CODE (x
);
7625 if (arm_address_register_rtx_p (x
, strict_p
))
7628 use_ldrd
= (TARGET_LDRD
7629 && (mode
== DImode
|| mode
== DFmode
));
7631 if (code
== POST_INC
|| code
== PRE_DEC
7632 || ((code
== PRE_INC
|| code
== POST_DEC
)
7633 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7634 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7636 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7637 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7638 && GET_CODE (XEXP (x
, 1)) == PLUS
7639 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7641 rtx addend
= XEXP (XEXP (x
, 1), 1);
7643 /* Don't allow ldrd post increment by register because it's hard
7644 to fixup invalid register choices. */
7646 && GET_CODE (x
) == POST_MODIFY
7650 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7651 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7654 /* After reload constants split into minipools will have addresses
7655 from a LABEL_REF. */
7656 else if (reload_completed
7657 && (code
== LABEL_REF
7659 && GET_CODE (XEXP (x
, 0)) == PLUS
7660 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7661 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7664 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7667 else if (code
== PLUS
)
7669 rtx xop0
= XEXP (x
, 0);
7670 rtx xop1
= XEXP (x
, 1);
7672 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7673 && ((CONST_INT_P (xop1
)
7674 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7675 || (!strict_p
&& will_be_in_index_register (xop1
))))
7676 || (arm_address_register_rtx_p (xop1
, strict_p
)
7677 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7681 /* Reload currently can't handle MINUS, so disable this for now */
7682 else if (GET_CODE (x
) == MINUS
)
7684 rtx xop0
= XEXP (x
, 0);
7685 rtx xop1
= XEXP (x
, 1);
7687 return (arm_address_register_rtx_p (xop0
, strict_p
)
7688 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7692 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7693 && code
== SYMBOL_REF
7694 && CONSTANT_POOL_ADDRESS_P (x
)
7696 && symbol_mentioned_p (get_pool_constant (x
))
7697 && ! pcrel_constant_p (get_pool_constant (x
))))
7703 /* Return nonzero if X is a valid Thumb-2 address operand. */
7705 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7708 enum rtx_code code
= GET_CODE (x
);
7710 if (arm_address_register_rtx_p (x
, strict_p
))
7713 use_ldrd
= (TARGET_LDRD
7714 && (mode
== DImode
|| mode
== DFmode
));
7716 if (code
== POST_INC
|| code
== PRE_DEC
7717 || ((code
== PRE_INC
|| code
== POST_DEC
)
7718 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7719 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7721 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7722 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7723 && GET_CODE (XEXP (x
, 1)) == PLUS
7724 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7726 /* Thumb-2 only has autoincrement by constant. */
7727 rtx addend
= XEXP (XEXP (x
, 1), 1);
7728 HOST_WIDE_INT offset
;
7730 if (!CONST_INT_P (addend
))
7733 offset
= INTVAL(addend
);
7734 if (GET_MODE_SIZE (mode
) <= 4)
7735 return (offset
> -256 && offset
< 256);
7737 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7738 && (offset
& 3) == 0);
7741 /* After reload constants split into minipools will have addresses
7742 from a LABEL_REF. */
7743 else if (reload_completed
7744 && (code
== LABEL_REF
7746 && GET_CODE (XEXP (x
, 0)) == PLUS
7747 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7748 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7751 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7754 else if (code
== PLUS
)
7756 rtx xop0
= XEXP (x
, 0);
7757 rtx xop1
= XEXP (x
, 1);
7759 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7760 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7761 || (!strict_p
&& will_be_in_index_register (xop1
))))
7762 || (arm_address_register_rtx_p (xop1
, strict_p
)
7763 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7766 /* Normally we can assign constant values to target registers without
7767 the help of constant pool. But there are cases we have to use constant
7769 1) assign a label to register.
7770 2) sign-extend a 8bit value to 32bit and then assign to register.
7772 Constant pool access in format:
7773 (set (reg r0) (mem (symbol_ref (".LC0"))))
7774 will cause the use of literal pool (later in function arm_reorg).
7775 So here we mark such format as an invalid format, then the compiler
7776 will adjust it into:
7777 (set (reg r0) (symbol_ref (".LC0")))
7778 (set (reg r0) (mem (reg r0))).
7779 No extra register is required, and (mem (reg r0)) won't cause the use
7780 of literal pools. */
7781 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7782 && CONSTANT_POOL_ADDRESS_P (x
))
7785 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7786 && code
== SYMBOL_REF
7787 && CONSTANT_POOL_ADDRESS_P (x
)
7789 && symbol_mentioned_p (get_pool_constant (x
))
7790 && ! pcrel_constant_p (get_pool_constant (x
))))
7796 /* Return nonzero if INDEX is valid for an address index operand in
7799 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7802 HOST_WIDE_INT range
;
7803 enum rtx_code code
= GET_CODE (index
);
7805 /* Standard coprocessor addressing modes. */
7806 if (TARGET_HARD_FLOAT
7807 && (mode
== SFmode
|| mode
== DFmode
))
7808 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7809 && INTVAL (index
) > -1024
7810 && (INTVAL (index
) & 3) == 0);
7812 /* For quad modes, we restrict the constant offset to be slightly less
7813 than what the instruction format permits. We do this because for
7814 quad mode moves, we will actually decompose them into two separate
7815 double-mode reads or writes. INDEX must therefore be a valid
7816 (double-mode) offset and so should INDEX+8. */
7817 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7818 return (code
== CONST_INT
7819 && INTVAL (index
) < 1016
7820 && INTVAL (index
) > -1024
7821 && (INTVAL (index
) & 3) == 0);
7823 /* We have no such constraint on double mode offsets, so we permit the
7824 full range of the instruction format. */
7825 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7826 return (code
== CONST_INT
7827 && INTVAL (index
) < 1024
7828 && INTVAL (index
) > -1024
7829 && (INTVAL (index
) & 3) == 0);
7831 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7832 return (code
== CONST_INT
7833 && INTVAL (index
) < 1024
7834 && INTVAL (index
) > -1024
7835 && (INTVAL (index
) & 3) == 0);
7837 if (arm_address_register_rtx_p (index
, strict_p
)
7838 && (GET_MODE_SIZE (mode
) <= 4))
7841 if (mode
== DImode
|| mode
== DFmode
)
7843 if (code
== CONST_INT
)
7845 HOST_WIDE_INT val
= INTVAL (index
);
7848 return val
> -256 && val
< 256;
7850 return val
> -4096 && val
< 4092;
7853 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7856 if (GET_MODE_SIZE (mode
) <= 4
7860 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7864 rtx xiop0
= XEXP (index
, 0);
7865 rtx xiop1
= XEXP (index
, 1);
7867 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7868 && power_of_two_operand (xiop1
, SImode
))
7869 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7870 && power_of_two_operand (xiop0
, SImode
)));
7872 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7873 || code
== ASHIFT
|| code
== ROTATERT
)
7875 rtx op
= XEXP (index
, 1);
7877 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7880 && INTVAL (op
) <= 31);
7884 /* For ARM v4 we may be doing a sign-extend operation during the
7890 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7896 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7898 return (code
== CONST_INT
7899 && INTVAL (index
) < range
7900 && INTVAL (index
) > -range
);
7903 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7904 index operand. i.e. 1, 2, 4 or 8. */
7906 thumb2_index_mul_operand (rtx op
)
7910 if (!CONST_INT_P (op
))
7914 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7917 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7919 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7921 enum rtx_code code
= GET_CODE (index
);
7923 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7924 /* Standard coprocessor addressing modes. */
7925 if (TARGET_HARD_FLOAT
7926 && (mode
== SFmode
|| mode
== DFmode
))
7927 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7928 /* Thumb-2 allows only > -256 index range for it's core register
7929 load/stores. Since we allow SF/DF in core registers, we have
7930 to use the intersection between -256~4096 (core) and -1024~1024
7932 && INTVAL (index
) > -256
7933 && (INTVAL (index
) & 3) == 0);
7935 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7937 /* For DImode assume values will usually live in core regs
7938 and only allow LDRD addressing modes. */
7939 if (!TARGET_LDRD
|| mode
!= DImode
)
7940 return (code
== CONST_INT
7941 && INTVAL (index
) < 1024
7942 && INTVAL (index
) > -1024
7943 && (INTVAL (index
) & 3) == 0);
7946 /* For quad modes, we restrict the constant offset to be slightly less
7947 than what the instruction format permits. We do this because for
7948 quad mode moves, we will actually decompose them into two separate
7949 double-mode reads or writes. INDEX must therefore be a valid
7950 (double-mode) offset and so should INDEX+8. */
7951 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7952 return (code
== CONST_INT
7953 && INTVAL (index
) < 1016
7954 && INTVAL (index
) > -1024
7955 && (INTVAL (index
) & 3) == 0);
7957 /* We have no such constraint on double mode offsets, so we permit the
7958 full range of the instruction format. */
7959 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7960 return (code
== CONST_INT
7961 && INTVAL (index
) < 1024
7962 && INTVAL (index
) > -1024
7963 && (INTVAL (index
) & 3) == 0);
7965 if (arm_address_register_rtx_p (index
, strict_p
)
7966 && (GET_MODE_SIZE (mode
) <= 4))
7969 if (mode
== DImode
|| mode
== DFmode
)
7971 if (code
== CONST_INT
)
7973 HOST_WIDE_INT val
= INTVAL (index
);
7974 /* ??? Can we assume ldrd for thumb2? */
7975 /* Thumb-2 ldrd only has reg+const addressing modes. */
7976 /* ldrd supports offsets of +-1020.
7977 However the ldr fallback does not. */
7978 return val
> -256 && val
< 256 && (val
& 3) == 0;
7986 rtx xiop0
= XEXP (index
, 0);
7987 rtx xiop1
= XEXP (index
, 1);
7989 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7990 && thumb2_index_mul_operand (xiop1
))
7991 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7992 && thumb2_index_mul_operand (xiop0
)));
7994 else if (code
== ASHIFT
)
7996 rtx op
= XEXP (index
, 1);
7998 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8001 && INTVAL (op
) <= 3);
8004 return (code
== CONST_INT
8005 && INTVAL (index
) < 4096
8006 && INTVAL (index
) > -256);
8009 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8011 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8021 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8023 return (regno
<= LAST_LO_REGNUM
8024 || regno
> LAST_VIRTUAL_REGISTER
8025 || regno
== FRAME_POINTER_REGNUM
8026 || (GET_MODE_SIZE (mode
) >= 4
8027 && (regno
== STACK_POINTER_REGNUM
8028 || regno
>= FIRST_PSEUDO_REGISTER
8029 || x
== hard_frame_pointer_rtx
8030 || x
== arg_pointer_rtx
)));
8033 /* Return nonzero if x is a legitimate index register. This is the case
8034 for any base register that can access a QImode object. */
8036 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8038 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8041 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8043 The AP may be eliminated to either the SP or the FP, so we use the
8044 least common denominator, e.g. SImode, and offsets from 0 to 64.
8046 ??? Verify whether the above is the right approach.
8048 ??? Also, the FP may be eliminated to the SP, so perhaps that
8049 needs special handling also.
8051 ??? Look at how the mips16 port solves this problem. It probably uses
8052 better ways to solve some of these problems.
8054 Although it is not incorrect, we don't accept QImode and HImode
8055 addresses based on the frame pointer or arg pointer until the
8056 reload pass starts. This is so that eliminating such addresses
8057 into stack based ones won't produce impossible code. */
8059 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8061 /* ??? Not clear if this is right. Experiment. */
8062 if (GET_MODE_SIZE (mode
) < 4
8063 && !(reload_in_progress
|| reload_completed
)
8064 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8065 || reg_mentioned_p (arg_pointer_rtx
, x
)
8066 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8067 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8068 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8069 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8072 /* Accept any base register. SP only in SImode or larger. */
8073 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8076 /* This is PC relative data before arm_reorg runs. */
8077 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8078 && GET_CODE (x
) == SYMBOL_REF
8079 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8082 /* This is PC relative data after arm_reorg runs. */
8083 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8085 && (GET_CODE (x
) == LABEL_REF
8086 || (GET_CODE (x
) == CONST
8087 && GET_CODE (XEXP (x
, 0)) == PLUS
8088 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8089 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8092 /* Post-inc indexing only supported for SImode and larger. */
8093 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8094 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8097 else if (GET_CODE (x
) == PLUS
)
8099 /* REG+REG address can be any two index registers. */
8100 /* We disallow FRAME+REG addressing since we know that FRAME
8101 will be replaced with STACK, and SP relative addressing only
8102 permits SP+OFFSET. */
8103 if (GET_MODE_SIZE (mode
) <= 4
8104 && XEXP (x
, 0) != frame_pointer_rtx
8105 && XEXP (x
, 1) != frame_pointer_rtx
8106 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8107 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8108 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8111 /* REG+const has 5-7 bit offset for non-SP registers. */
8112 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8113 || XEXP (x
, 0) == arg_pointer_rtx
)
8114 && CONST_INT_P (XEXP (x
, 1))
8115 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8118 /* REG+const has 10-bit offset for SP, but only SImode and
8119 larger is supported. */
8120 /* ??? Should probably check for DI/DFmode overflow here
8121 just like GO_IF_LEGITIMATE_OFFSET does. */
8122 else if (REG_P (XEXP (x
, 0))
8123 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8124 && GET_MODE_SIZE (mode
) >= 4
8125 && CONST_INT_P (XEXP (x
, 1))
8126 && INTVAL (XEXP (x
, 1)) >= 0
8127 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8128 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8131 else if (REG_P (XEXP (x
, 0))
8132 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8133 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8134 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8135 && REGNO (XEXP (x
, 0))
8136 <= LAST_VIRTUAL_POINTER_REGISTER
))
8137 && GET_MODE_SIZE (mode
) >= 4
8138 && CONST_INT_P (XEXP (x
, 1))
8139 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8143 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8144 && GET_MODE_SIZE (mode
) == 4
8145 && GET_CODE (x
) == SYMBOL_REF
8146 && CONSTANT_POOL_ADDRESS_P (x
)
8148 && symbol_mentioned_p (get_pool_constant (x
))
8149 && ! pcrel_constant_p (get_pool_constant (x
))))
8155 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8156 instruction of mode MODE. */
8158 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8160 switch (GET_MODE_SIZE (mode
))
8163 return val
>= 0 && val
< 32;
8166 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8170 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8176 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8179 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8180 else if (TARGET_THUMB2
)
8181 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8182 else /* if (TARGET_THUMB1) */
8183 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8186 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8188 Given an rtx X being reloaded into a reg required to be
8189 in class CLASS, return the class of reg to actually use.
8190 In general this is just CLASS, but for the Thumb core registers and
8191 immediate constants we prefer a LO_REGS class or a subset. */
8194 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8200 if (rclass
== GENERAL_REGS
)
8207 /* Build the SYMBOL_REF for __tls_get_addr. */
8209 static GTY(()) rtx tls_get_addr_libfunc
;
8212 get_tls_get_addr (void)
8214 if (!tls_get_addr_libfunc
)
8215 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8216 return tls_get_addr_libfunc
;
8220 arm_load_tp (rtx target
)
8223 target
= gen_reg_rtx (SImode
);
8227 /* Can return in any reg. */
8228 emit_insn (gen_load_tp_hard (target
));
8232 /* Always returned in r0. Immediately copy the result into a pseudo,
8233 otherwise other uses of r0 (e.g. setting up function arguments) may
8234 clobber the value. */
8238 emit_insn (gen_load_tp_soft ());
8240 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8241 emit_move_insn (target
, tmp
);
8247 load_tls_operand (rtx x
, rtx reg
)
8251 if (reg
== NULL_RTX
)
8252 reg
= gen_reg_rtx (SImode
);
8254 tmp
= gen_rtx_CONST (SImode
, x
);
8256 emit_move_insn (reg
, tmp
);
8262 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8264 rtx label
, labelno
, sum
;
8266 gcc_assert (reloc
!= TLS_DESCSEQ
);
8269 labelno
= GEN_INT (pic_labelno
++);
8270 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8271 label
= gen_rtx_CONST (VOIDmode
, label
);
8273 sum
= gen_rtx_UNSPEC (Pmode
,
8274 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8275 GEN_INT (TARGET_ARM
? 8 : 4)),
8277 reg
= load_tls_operand (sum
, reg
);
8280 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8282 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8284 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8285 LCT_PURE
, /* LCT_CONST? */
8286 Pmode
, 1, reg
, Pmode
);
8288 rtx_insn
*insns
= get_insns ();
8295 arm_tls_descseq_addr (rtx x
, rtx reg
)
8297 rtx labelno
= GEN_INT (pic_labelno
++);
8298 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8299 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8300 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8301 gen_rtx_CONST (VOIDmode
, label
),
8302 GEN_INT (!TARGET_ARM
)),
8304 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8306 emit_insn (gen_tlscall (x
, labelno
));
8308 reg
= gen_reg_rtx (SImode
);
8310 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8312 emit_move_insn (reg
, reg0
);
8318 legitimize_tls_address (rtx x
, rtx reg
)
8320 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8322 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8326 case TLS_MODEL_GLOBAL_DYNAMIC
:
8327 if (TARGET_GNU2_TLS
)
8329 reg
= arm_tls_descseq_addr (x
, reg
);
8331 tp
= arm_load_tp (NULL_RTX
);
8333 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8337 /* Original scheme */
8338 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8339 dest
= gen_reg_rtx (Pmode
);
8340 emit_libcall_block (insns
, dest
, ret
, x
);
8344 case TLS_MODEL_LOCAL_DYNAMIC
:
8345 if (TARGET_GNU2_TLS
)
8347 reg
= arm_tls_descseq_addr (x
, reg
);
8349 tp
= arm_load_tp (NULL_RTX
);
8351 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8355 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8357 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8358 share the LDM result with other LD model accesses. */
8359 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8361 dest
= gen_reg_rtx (Pmode
);
8362 emit_libcall_block (insns
, dest
, ret
, eqv
);
8364 /* Load the addend. */
8365 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8366 GEN_INT (TLS_LDO32
)),
8368 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8369 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8373 case TLS_MODEL_INITIAL_EXEC
:
8374 labelno
= GEN_INT (pic_labelno
++);
8375 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8376 label
= gen_rtx_CONST (VOIDmode
, label
);
8377 sum
= gen_rtx_UNSPEC (Pmode
,
8378 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8379 GEN_INT (TARGET_ARM
? 8 : 4)),
8381 reg
= load_tls_operand (sum
, reg
);
8384 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8385 else if (TARGET_THUMB2
)
8386 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8389 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8390 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8393 tp
= arm_load_tp (NULL_RTX
);
8395 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8397 case TLS_MODEL_LOCAL_EXEC
:
8398 tp
= arm_load_tp (NULL_RTX
);
8400 reg
= gen_rtx_UNSPEC (Pmode
,
8401 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8403 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8405 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8412 /* Try machine-dependent ways of modifying an illegitimate address
8413 to be legitimate. If we find one, return the new, valid address. */
8415 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8417 if (arm_tls_referenced_p (x
))
8421 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8423 addend
= XEXP (XEXP (x
, 0), 1);
8424 x
= XEXP (XEXP (x
, 0), 0);
8427 if (GET_CODE (x
) != SYMBOL_REF
)
8430 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8432 x
= legitimize_tls_address (x
, NULL_RTX
);
8436 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8445 /* TODO: legitimize_address for Thumb2. */
8448 return thumb_legitimize_address (x
, orig_x
, mode
);
8451 if (GET_CODE (x
) == PLUS
)
8453 rtx xop0
= XEXP (x
, 0);
8454 rtx xop1
= XEXP (x
, 1);
8456 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8457 xop0
= force_reg (SImode
, xop0
);
8459 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8460 && !symbol_mentioned_p (xop1
))
8461 xop1
= force_reg (SImode
, xop1
);
8463 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8464 && CONST_INT_P (xop1
))
8466 HOST_WIDE_INT n
, low_n
;
8470 /* VFP addressing modes actually allow greater offsets, but for
8471 now we just stick with the lowest common denominator. */
8472 if (mode
== DImode
|| mode
== DFmode
)
8484 low_n
= ((mode
) == TImode
? 0
8485 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8489 base_reg
= gen_reg_rtx (SImode
);
8490 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8491 emit_move_insn (base_reg
, val
);
8492 x
= plus_constant (Pmode
, base_reg
, low_n
);
8494 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8495 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8498 /* XXX We don't allow MINUS any more -- see comment in
8499 arm_legitimate_address_outer_p (). */
8500 else if (GET_CODE (x
) == MINUS
)
8502 rtx xop0
= XEXP (x
, 0);
8503 rtx xop1
= XEXP (x
, 1);
8505 if (CONSTANT_P (xop0
))
8506 xop0
= force_reg (SImode
, xop0
);
8508 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8509 xop1
= force_reg (SImode
, xop1
);
8511 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8512 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8515 /* Make sure to take full advantage of the pre-indexed addressing mode
8516 with absolute addresses which often allows for the base register to
8517 be factorized for multiple adjacent memory references, and it might
8518 even allows for the mini pool to be avoided entirely. */
8519 else if (CONST_INT_P (x
) && optimize
> 0)
8522 HOST_WIDE_INT mask
, base
, index
;
8525 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8526 use a 8-bit index. So let's use a 12-bit index for SImode only and
8527 hope that arm_gen_constant will enable ldrb to use more bits. */
8528 bits
= (mode
== SImode
) ? 12 : 8;
8529 mask
= (1 << bits
) - 1;
8530 base
= INTVAL (x
) & ~mask
;
8531 index
= INTVAL (x
) & mask
;
8532 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8534 /* It'll most probably be more efficient to generate the base
8535 with more bits set and use a negative index instead. */
8539 base_reg
= force_reg (SImode
, GEN_INT (base
));
8540 x
= plus_constant (Pmode
, base_reg
, index
);
8545 /* We need to find and carefully transform any SYMBOL and LABEL
8546 references; so go back to the original address expression. */
8547 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8549 if (new_x
!= orig_x
)
8557 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8558 to be legitimate. If we find one, return the new, valid address. */
8560 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8562 if (GET_CODE (x
) == PLUS
8563 && CONST_INT_P (XEXP (x
, 1))
8564 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8565 || INTVAL (XEXP (x
, 1)) < 0))
8567 rtx xop0
= XEXP (x
, 0);
8568 rtx xop1
= XEXP (x
, 1);
8569 HOST_WIDE_INT offset
= INTVAL (xop1
);
8571 /* Try and fold the offset into a biasing of the base register and
8572 then offsetting that. Don't do this when optimizing for space
8573 since it can cause too many CSEs. */
8574 if (optimize_size
&& offset
>= 0
8575 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8577 HOST_WIDE_INT delta
;
8580 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8581 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8582 delta
= 31 * GET_MODE_SIZE (mode
);
8584 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8586 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8588 x
= plus_constant (Pmode
, xop0
, delta
);
8590 else if (offset
< 0 && offset
> -256)
8591 /* Small negative offsets are best done with a subtract before the
8592 dereference, forcing these into a register normally takes two
8594 x
= force_operand (x
, NULL_RTX
);
8597 /* For the remaining cases, force the constant into a register. */
8598 xop1
= force_reg (SImode
, xop1
);
8599 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8602 else if (GET_CODE (x
) == PLUS
8603 && s_register_operand (XEXP (x
, 1), SImode
)
8604 && !s_register_operand (XEXP (x
, 0), SImode
))
8606 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8608 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8613 /* We need to find and carefully transform any SYMBOL and LABEL
8614 references; so go back to the original address expression. */
8615 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8617 if (new_x
!= orig_x
)
8624 /* Return TRUE if X contains any TLS symbol references. */
8627 arm_tls_referenced_p (rtx x
)
8629 if (! TARGET_HAVE_TLS
)
8632 subrtx_iterator::array_type array
;
8633 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8635 const_rtx x
= *iter
;
8636 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8639 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8640 TLS offsets, not real symbol references. */
8641 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8642 iter
.skip_subrtxes ();
8647 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8649 On the ARM, allow any integer (invalid ones are removed later by insn
8650 patterns), nice doubles and symbol_refs which refer to the function's
8653 When generating pic allow anything. */
8656 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8658 return flag_pic
|| !label_mentioned_p (x
);
8662 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8664 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8665 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8666 for ARMv8-M Baseline or later the result is valid. */
8667 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8670 return (CONST_INT_P (x
)
8671 || CONST_DOUBLE_P (x
)
8672 || CONSTANT_ADDRESS_P (x
)
8677 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8679 return (!arm_cannot_force_const_mem (mode
, x
)
8681 ? arm_legitimate_constant_p_1 (mode
, x
)
8682 : thumb_legitimate_constant_p (mode
, x
)));
8685 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8688 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8692 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8694 split_const (x
, &base
, &offset
);
8695 if (GET_CODE (base
) == SYMBOL_REF
8696 && !offset_within_block_p (base
, INTVAL (offset
)))
8699 return arm_tls_referenced_p (x
);
8702 #define REG_OR_SUBREG_REG(X) \
8704 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8706 #define REG_OR_SUBREG_RTX(X) \
8707 (REG_P (X) ? (X) : SUBREG_REG (X))
8710 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8712 machine_mode mode
= GET_MODE (x
);
8721 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8728 return COSTS_N_INSNS (1);
8731 if (CONST_INT_P (XEXP (x
, 1)))
8734 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8741 return COSTS_N_INSNS (2) + cycles
;
8743 return COSTS_N_INSNS (1) + 16;
8746 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8748 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8749 return (COSTS_N_INSNS (words
)
8750 + 4 * ((MEM_P (SET_SRC (x
)))
8751 + MEM_P (SET_DEST (x
))));
8756 if (UINTVAL (x
) < 256
8757 /* 16-bit constant. */
8758 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8760 if (thumb_shiftable_const (INTVAL (x
)))
8761 return COSTS_N_INSNS (2);
8762 return COSTS_N_INSNS (3);
8764 else if ((outer
== PLUS
|| outer
== COMPARE
)
8765 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8767 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8768 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8769 return COSTS_N_INSNS (1);
8770 else if (outer
== AND
)
8773 /* This duplicates the tests in the andsi3 expander. */
8774 for (i
= 9; i
<= 31; i
++)
8775 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8776 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8777 return COSTS_N_INSNS (2);
8779 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8780 || outer
== LSHIFTRT
)
8782 return COSTS_N_INSNS (2);
8788 return COSTS_N_INSNS (3);
8806 /* XXX another guess. */
8807 /* Memory costs quite a lot for the first word, but subsequent words
8808 load at the equivalent of a single insn each. */
8809 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8810 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8815 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8821 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8822 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8828 return total
+ COSTS_N_INSNS (1);
8830 /* Assume a two-shift sequence. Increase the cost slightly so
8831 we prefer actual shifts over an extend operation. */
8832 return total
+ 1 + COSTS_N_INSNS (2);
8839 /* Estimates the size cost of thumb1 instructions.
8840 For now most of the code is copied from thumb1_rtx_costs. We need more
8841 fine grain tuning when we have more related test cases. */
8843 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8845 machine_mode mode
= GET_MODE (x
);
8854 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8858 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8859 defined by RTL expansion, especially for the expansion of
8861 if ((GET_CODE (XEXP (x
, 0)) == MULT
8862 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8863 || (GET_CODE (XEXP (x
, 1)) == MULT
8864 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8865 return COSTS_N_INSNS (2);
8870 return COSTS_N_INSNS (1);
8873 if (CONST_INT_P (XEXP (x
, 1)))
8875 /* Thumb1 mul instruction can't operate on const. We must Load it
8876 into a register first. */
8877 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8878 /* For the targets which have a very small and high-latency multiply
8879 unit, we prefer to synthesize the mult with up to 5 instructions,
8880 giving a good balance between size and performance. */
8881 if (arm_arch6m
&& arm_m_profile_small_mul
)
8882 return COSTS_N_INSNS (5);
8884 return COSTS_N_INSNS (1) + const_size
;
8886 return COSTS_N_INSNS (1);
8889 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8891 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8892 cost
= COSTS_N_INSNS (words
);
8893 if (satisfies_constraint_J (SET_SRC (x
))
8894 || satisfies_constraint_K (SET_SRC (x
))
8895 /* Too big an immediate for a 2-byte mov, using MOVT. */
8896 || (CONST_INT_P (SET_SRC (x
))
8897 && UINTVAL (SET_SRC (x
)) >= 256
8899 && satisfies_constraint_j (SET_SRC (x
)))
8900 /* thumb1_movdi_insn. */
8901 || ((words
> 1) && MEM_P (SET_SRC (x
))))
8902 cost
+= COSTS_N_INSNS (1);
8908 if (UINTVAL (x
) < 256)
8909 return COSTS_N_INSNS (1);
8910 /* movw is 4byte long. */
8911 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
8912 return COSTS_N_INSNS (2);
8913 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8914 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8915 return COSTS_N_INSNS (2);
8916 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8917 if (thumb_shiftable_const (INTVAL (x
)))
8918 return COSTS_N_INSNS (2);
8919 return COSTS_N_INSNS (3);
8921 else if ((outer
== PLUS
|| outer
== COMPARE
)
8922 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8924 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8925 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8926 return COSTS_N_INSNS (1);
8927 else if (outer
== AND
)
8930 /* This duplicates the tests in the andsi3 expander. */
8931 for (i
= 9; i
<= 31; i
++)
8932 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8933 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8934 return COSTS_N_INSNS (2);
8936 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8937 || outer
== LSHIFTRT
)
8939 return COSTS_N_INSNS (2);
8945 return COSTS_N_INSNS (3);
8959 return COSTS_N_INSNS (1);
8962 return (COSTS_N_INSNS (1)
8964 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8965 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8966 ? COSTS_N_INSNS (1) : 0));
8970 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8975 /* XXX still guessing. */
8976 switch (GET_MODE (XEXP (x
, 0)))
8979 return (1 + (mode
== DImode
? 4 : 0)
8980 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8983 return (4 + (mode
== DImode
? 4 : 0)
8984 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8987 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8998 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8999 operand, then return the operand that is being shifted. If the shift
9000 is not by a constant, then set SHIFT_REG to point to the operand.
9001 Return NULL if OP is not a shifter operand. */
9003 shifter_op_p (rtx op
, rtx
*shift_reg
)
9005 enum rtx_code code
= GET_CODE (op
);
9007 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9008 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9009 return XEXP (op
, 0);
9010 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9011 return XEXP (op
, 0);
9012 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9013 || code
== ASHIFTRT
)
9015 if (!CONST_INT_P (XEXP (op
, 1)))
9016 *shift_reg
= XEXP (op
, 1);
9017 return XEXP (op
, 0);
9024 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9026 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9027 rtx_code code
= GET_CODE (x
);
9028 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9030 switch (XINT (x
, 1))
9032 case UNSPEC_UNALIGNED_LOAD
:
9033 /* We can only do unaligned loads into the integer unit, and we can't
9035 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9037 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9038 + extra_cost
->ldst
.load_unaligned
);
9041 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9042 ADDR_SPACE_GENERIC
, speed_p
);
9046 case UNSPEC_UNALIGNED_STORE
:
9047 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9049 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9050 + extra_cost
->ldst
.store_unaligned
);
9052 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9054 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9055 ADDR_SPACE_GENERIC
, speed_p
);
9066 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9070 *cost
= COSTS_N_INSNS (2);
9076 /* Cost of a libcall. We assume one insn per argument, an amount for the
9077 call (one insn for -Os) and then one for processing the result. */
9078 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9080 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9083 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9084 if (shift_op != NULL \
9085 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9090 *cost += extra_cost->alu.arith_shift_reg; \
9091 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9092 ASHIFT, 1, speed_p); \
9095 *cost += extra_cost->alu.arith_shift; \
9097 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9098 ASHIFT, 0, speed_p) \
9099 + rtx_cost (XEXP (x, 1 - IDX), \
9100 GET_MODE (shift_op), \
9107 /* RTX costs. Make an estimate of the cost of executing the operation
9108 X, which is contained with an operation with code OUTER_CODE.
9109 SPEED_P indicates whether the cost desired is the performance cost,
9110 or the size cost. The estimate is stored in COST and the return
9111 value is TRUE if the cost calculation is final, or FALSE if the
9112 caller should recurse through the operands of X to add additional
9115 We currently make no attempt to model the size savings of Thumb-2
9116 16-bit instructions. At the normal points in compilation where
9117 this code is called we have no measure of whether the condition
9118 flags are live or not, and thus no realistic way to determine what
9119 the size will eventually be. */
9121 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9122 const struct cpu_cost_table
*extra_cost
,
9123 int *cost
, bool speed_p
)
9125 machine_mode mode
= GET_MODE (x
);
9127 *cost
= COSTS_N_INSNS (1);
9132 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9134 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9142 /* SET RTXs don't have a mode so we get it from the destination. */
9143 mode
= GET_MODE (SET_DEST (x
));
9145 if (REG_P (SET_SRC (x
))
9146 && REG_P (SET_DEST (x
)))
9148 /* Assume that most copies can be done with a single insn,
9149 unless we don't have HW FP, in which case everything
9150 larger than word mode will require two insns. */
9151 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9152 && GET_MODE_SIZE (mode
) > 4)
9155 /* Conditional register moves can be encoded
9156 in 16 bits in Thumb mode. */
9157 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9163 if (CONST_INT_P (SET_SRC (x
)))
9165 /* Handle CONST_INT here, since the value doesn't have a mode
9166 and we would otherwise be unable to work out the true cost. */
9167 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9170 /* Slightly lower the cost of setting a core reg to a constant.
9171 This helps break up chains and allows for better scheduling. */
9172 if (REG_P (SET_DEST (x
))
9173 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9176 /* Immediate moves with an immediate in the range [0, 255] can be
9177 encoded in 16 bits in Thumb mode. */
9178 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9179 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9181 goto const_int_cost
;
9187 /* A memory access costs 1 insn if the mode is small, or the address is
9188 a single register, otherwise it costs one insn per word. */
9189 if (REG_P (XEXP (x
, 0)))
9190 *cost
= COSTS_N_INSNS (1);
9192 && GET_CODE (XEXP (x
, 0)) == PLUS
9193 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9194 /* This will be split into two instructions.
9195 See arm.md:calculate_pic_address. */
9196 *cost
= COSTS_N_INSNS (2);
9198 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9200 /* For speed optimizations, add the costs of the address and
9201 accessing memory. */
9204 *cost
+= (extra_cost
->ldst
.load
9205 + arm_address_cost (XEXP (x
, 0), mode
,
9206 ADDR_SPACE_GENERIC
, speed_p
));
9208 *cost
+= extra_cost
->ldst
.load
;
9214 /* Calculations of LDM costs are complex. We assume an initial cost
9215 (ldm_1st) which will load the number of registers mentioned in
9216 ldm_regs_per_insn_1st registers; then each additional
9217 ldm_regs_per_insn_subsequent registers cost one more insn. The
9218 formula for N regs is thus:
9220 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9221 + ldm_regs_per_insn_subsequent - 1)
9222 / ldm_regs_per_insn_subsequent).
9224 Additional costs may also be added for addressing. A similar
9225 formula is used for STM. */
9227 bool is_ldm
= load_multiple_operation (x
, SImode
);
9228 bool is_stm
= store_multiple_operation (x
, SImode
);
9230 if (is_ldm
|| is_stm
)
9234 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9235 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9236 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9237 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9238 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9239 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9240 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9242 *cost
+= regs_per_insn_1st
9243 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9244 + regs_per_insn_sub
- 1)
9245 / regs_per_insn_sub
);
9254 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9255 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9256 *cost
+= COSTS_N_INSNS (speed_p
9257 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9258 else if (mode
== SImode
&& TARGET_IDIV
)
9259 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9261 *cost
= LIBCALL_COST (2);
9262 return false; /* All arguments must be in registers. */
9265 /* MOD by a power of 2 can be expanded as:
9267 and r0, r0, #(n - 1)
9268 and r1, r1, #(n - 1)
9269 rsbpl r0, r1, #0. */
9270 if (CONST_INT_P (XEXP (x
, 1))
9271 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9274 *cost
+= COSTS_N_INSNS (3);
9277 *cost
+= 2 * extra_cost
->alu
.logical
9278 + extra_cost
->alu
.arith
;
9284 *cost
= LIBCALL_COST (2);
9285 return false; /* All arguments must be in registers. */
9288 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9290 *cost
+= (COSTS_N_INSNS (1)
9291 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9293 *cost
+= extra_cost
->alu
.shift_reg
;
9301 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9303 *cost
+= (COSTS_N_INSNS (2)
9304 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9306 *cost
+= 2 * extra_cost
->alu
.shift
;
9309 else if (mode
== SImode
)
9311 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9312 /* Slightly disparage register shifts at -Os, but not by much. */
9313 if (!CONST_INT_P (XEXP (x
, 1)))
9314 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9315 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9318 else if (GET_MODE_CLASS (mode
) == MODE_INT
9319 && GET_MODE_SIZE (mode
) < 4)
9323 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9324 /* Slightly disparage register shifts at -Os, but not by
9326 if (!CONST_INT_P (XEXP (x
, 1)))
9327 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9328 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9330 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9332 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9334 /* Can use SBFX/UBFX. */
9336 *cost
+= extra_cost
->alu
.bfx
;
9337 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9341 *cost
+= COSTS_N_INSNS (1);
9342 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9345 if (CONST_INT_P (XEXP (x
, 1)))
9346 *cost
+= 2 * extra_cost
->alu
.shift
;
9348 *cost
+= (extra_cost
->alu
.shift
9349 + extra_cost
->alu
.shift_reg
);
9352 /* Slightly disparage register shifts. */
9353 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9358 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9359 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9362 if (CONST_INT_P (XEXP (x
, 1)))
9363 *cost
+= (2 * extra_cost
->alu
.shift
9364 + extra_cost
->alu
.log_shift
);
9366 *cost
+= (extra_cost
->alu
.shift
9367 + extra_cost
->alu
.shift_reg
9368 + extra_cost
->alu
.log_shift_reg
);
9374 *cost
= LIBCALL_COST (2);
9383 *cost
+= extra_cost
->alu
.rev
;
9390 /* No rev instruction available. Look at arm_legacy_rev
9391 and thumb_legacy_rev for the form of RTL used then. */
9394 *cost
+= COSTS_N_INSNS (9);
9398 *cost
+= 6 * extra_cost
->alu
.shift
;
9399 *cost
+= 3 * extra_cost
->alu
.logical
;
9404 *cost
+= COSTS_N_INSNS (4);
9408 *cost
+= 2 * extra_cost
->alu
.shift
;
9409 *cost
+= extra_cost
->alu
.arith_shift
;
9410 *cost
+= 2 * extra_cost
->alu
.logical
;
9418 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9419 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9421 if (GET_CODE (XEXP (x
, 0)) == MULT
9422 || GET_CODE (XEXP (x
, 1)) == MULT
)
9424 rtx mul_op0
, mul_op1
, sub_op
;
9427 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9429 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9431 mul_op0
= XEXP (XEXP (x
, 0), 0);
9432 mul_op1
= XEXP (XEXP (x
, 0), 1);
9433 sub_op
= XEXP (x
, 1);
9437 mul_op0
= XEXP (XEXP (x
, 1), 0);
9438 mul_op1
= XEXP (XEXP (x
, 1), 1);
9439 sub_op
= XEXP (x
, 0);
9442 /* The first operand of the multiply may be optionally
9444 if (GET_CODE (mul_op0
) == NEG
)
9445 mul_op0
= XEXP (mul_op0
, 0);
9447 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9448 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9449 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9455 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9461 rtx shift_by_reg
= NULL
;
9465 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9466 if (shift_op
== NULL
)
9468 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9469 non_shift_op
= XEXP (x
, 0);
9472 non_shift_op
= XEXP (x
, 1);
9474 if (shift_op
!= NULL
)
9476 if (shift_by_reg
!= NULL
)
9479 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9480 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9483 *cost
+= extra_cost
->alu
.arith_shift
;
9485 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9486 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9491 && GET_CODE (XEXP (x
, 1)) == MULT
)
9495 *cost
+= extra_cost
->mult
[0].add
;
9496 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9497 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9498 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9502 if (CONST_INT_P (XEXP (x
, 0)))
9504 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9505 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9507 *cost
= COSTS_N_INSNS (insns
);
9509 *cost
+= insns
* extra_cost
->alu
.arith
;
9510 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9514 *cost
+= extra_cost
->alu
.arith
;
9519 if (GET_MODE_CLASS (mode
) == MODE_INT
9520 && GET_MODE_SIZE (mode
) < 4)
9522 rtx shift_op
, shift_reg
;
9525 /* We check both sides of the MINUS for shifter operands since,
9526 unlike PLUS, it's not commutative. */
9528 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9529 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9531 /* Slightly disparage, as we might need to widen the result. */
9534 *cost
+= extra_cost
->alu
.arith
;
9536 if (CONST_INT_P (XEXP (x
, 0)))
9538 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9547 *cost
+= COSTS_N_INSNS (1);
9549 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9551 rtx op1
= XEXP (x
, 1);
9554 *cost
+= 2 * extra_cost
->alu
.arith
;
9556 if (GET_CODE (op1
) == ZERO_EXTEND
)
9557 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9560 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9561 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9565 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9568 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9569 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9571 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9574 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9575 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9578 *cost
+= (extra_cost
->alu
.arith
9579 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9580 ? extra_cost
->alu
.arith
9581 : extra_cost
->alu
.arith_shift
));
9582 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9583 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9584 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9589 *cost
+= 2 * extra_cost
->alu
.arith
;
9595 *cost
= LIBCALL_COST (2);
9599 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9600 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9602 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9604 rtx mul_op0
, mul_op1
, add_op
;
9607 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9609 mul_op0
= XEXP (XEXP (x
, 0), 0);
9610 mul_op1
= XEXP (XEXP (x
, 0), 1);
9611 add_op
= XEXP (x
, 1);
9613 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9614 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9615 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9621 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9624 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9626 *cost
= LIBCALL_COST (2);
9630 /* Narrow modes can be synthesized in SImode, but the range
9631 of useful sub-operations is limited. Check for shift operations
9632 on one of the operands. Only left shifts can be used in the
9634 if (GET_MODE_CLASS (mode
) == MODE_INT
9635 && GET_MODE_SIZE (mode
) < 4)
9637 rtx shift_op
, shift_reg
;
9640 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9642 if (CONST_INT_P (XEXP (x
, 1)))
9644 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9645 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9647 *cost
= COSTS_N_INSNS (insns
);
9649 *cost
+= insns
* extra_cost
->alu
.arith
;
9650 /* Slightly penalize a narrow operation as the result may
9652 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9656 /* Slightly penalize a narrow operation as the result may
9660 *cost
+= extra_cost
->alu
.arith
;
9667 rtx shift_op
, shift_reg
;
9670 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9671 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9673 /* UXTA[BH] or SXTA[BH]. */
9675 *cost
+= extra_cost
->alu
.extend_arith
;
9676 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9678 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9683 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9684 if (shift_op
!= NULL
)
9689 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9690 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9693 *cost
+= extra_cost
->alu
.arith_shift
;
9695 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9696 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9699 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9701 rtx mul_op
= XEXP (x
, 0);
9703 if (TARGET_DSP_MULTIPLY
9704 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9705 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9706 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9707 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9708 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9709 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9710 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9711 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9712 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9713 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9714 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9715 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9720 *cost
+= extra_cost
->mult
[0].extend_add
;
9721 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9722 SIGN_EXTEND
, 0, speed_p
)
9723 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9724 SIGN_EXTEND
, 0, speed_p
)
9725 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9730 *cost
+= extra_cost
->mult
[0].add
;
9731 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9732 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9733 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9736 if (CONST_INT_P (XEXP (x
, 1)))
9738 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9739 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9741 *cost
= COSTS_N_INSNS (insns
);
9743 *cost
+= insns
* extra_cost
->alu
.arith
;
9744 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9748 *cost
+= extra_cost
->alu
.arith
;
9756 && GET_CODE (XEXP (x
, 0)) == MULT
9757 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9758 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9759 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9760 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9763 *cost
+= extra_cost
->mult
[1].extend_add
;
9764 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9765 ZERO_EXTEND
, 0, speed_p
)
9766 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9767 ZERO_EXTEND
, 0, speed_p
)
9768 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9772 *cost
+= COSTS_N_INSNS (1);
9774 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9775 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9778 *cost
+= (extra_cost
->alu
.arith
9779 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9780 ? extra_cost
->alu
.arith
9781 : extra_cost
->alu
.arith_shift
));
9783 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9785 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9790 *cost
+= 2 * extra_cost
->alu
.arith
;
9795 *cost
= LIBCALL_COST (2);
9798 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9801 *cost
+= extra_cost
->alu
.rev
;
9809 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9810 rtx op0
= XEXP (x
, 0);
9811 rtx shift_op
, shift_reg
;
9815 || (code
== IOR
&& TARGET_THUMB2
)))
9816 op0
= XEXP (op0
, 0);
9819 shift_op
= shifter_op_p (op0
, &shift_reg
);
9820 if (shift_op
!= NULL
)
9825 *cost
+= extra_cost
->alu
.log_shift_reg
;
9826 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9829 *cost
+= extra_cost
->alu
.log_shift
;
9831 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9832 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9836 if (CONST_INT_P (XEXP (x
, 1)))
9838 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9839 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9842 *cost
= COSTS_N_INSNS (insns
);
9844 *cost
+= insns
* extra_cost
->alu
.logical
;
9845 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9850 *cost
+= extra_cost
->alu
.logical
;
9851 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9852 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9858 rtx op0
= XEXP (x
, 0);
9859 enum rtx_code subcode
= GET_CODE (op0
);
9861 *cost
+= COSTS_N_INSNS (1);
9865 || (code
== IOR
&& TARGET_THUMB2
)))
9866 op0
= XEXP (op0
, 0);
9868 if (GET_CODE (op0
) == ZERO_EXTEND
)
9871 *cost
+= 2 * extra_cost
->alu
.logical
;
9873 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9875 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9878 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9881 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9883 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9885 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9890 *cost
+= 2 * extra_cost
->alu
.logical
;
9896 *cost
= LIBCALL_COST (2);
9900 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9901 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9903 rtx op0
= XEXP (x
, 0);
9905 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
9906 op0
= XEXP (op0
, 0);
9909 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9911 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
9912 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
9915 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9917 *cost
= LIBCALL_COST (2);
9923 if (TARGET_DSP_MULTIPLY
9924 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9925 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9926 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9927 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9928 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9929 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9930 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9931 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9932 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9933 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9934 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9935 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9940 *cost
+= extra_cost
->mult
[0].extend
;
9941 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
9942 SIGN_EXTEND
, 0, speed_p
);
9943 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
9944 SIGN_EXTEND
, 1, speed_p
);
9948 *cost
+= extra_cost
->mult
[0].simple
;
9955 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9956 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9957 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9958 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9961 *cost
+= extra_cost
->mult
[1].extend
;
9962 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
9963 ZERO_EXTEND
, 0, speed_p
)
9964 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9965 ZERO_EXTEND
, 0, speed_p
));
9969 *cost
= LIBCALL_COST (2);
9974 *cost
= LIBCALL_COST (2);
9978 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9979 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9981 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9984 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
9989 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9993 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9995 *cost
= LIBCALL_COST (1);
10001 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10003 *cost
+= COSTS_N_INSNS (1);
10004 /* Assume the non-flag-changing variant. */
10006 *cost
+= (extra_cost
->alu
.log_shift
10007 + extra_cost
->alu
.arith_shift
);
10008 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10012 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10013 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10015 *cost
+= COSTS_N_INSNS (1);
10016 /* No extra cost for MOV imm and MVN imm. */
10017 /* If the comparison op is using the flags, there's no further
10018 cost, otherwise we need to add the cost of the comparison. */
10019 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10020 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10021 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10023 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10024 *cost
+= (COSTS_N_INSNS (1)
10025 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10027 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10030 *cost
+= extra_cost
->alu
.arith
;
10036 *cost
+= extra_cost
->alu
.arith
;
10040 if (GET_MODE_CLASS (mode
) == MODE_INT
10041 && GET_MODE_SIZE (mode
) < 4)
10043 /* Slightly disparage, as we might need an extend operation. */
10046 *cost
+= extra_cost
->alu
.arith
;
10050 if (mode
== DImode
)
10052 *cost
+= COSTS_N_INSNS (1);
10054 *cost
+= 2 * extra_cost
->alu
.arith
;
10059 *cost
= LIBCALL_COST (1);
10063 if (mode
== SImode
)
10066 rtx shift_reg
= NULL
;
10068 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10072 if (shift_reg
!= NULL
)
10075 *cost
+= extra_cost
->alu
.log_shift_reg
;
10076 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10079 *cost
+= extra_cost
->alu
.log_shift
;
10080 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10085 *cost
+= extra_cost
->alu
.logical
;
10088 if (mode
== DImode
)
10090 *cost
+= COSTS_N_INSNS (1);
10096 *cost
+= LIBCALL_COST (1);
10101 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10103 *cost
+= COSTS_N_INSNS (3);
10106 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10107 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10109 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10110 /* Assume that if one arm of the if_then_else is a register,
10111 that it will be tied with the result and eliminate the
10112 conditional insn. */
10113 if (REG_P (XEXP (x
, 1)))
10115 else if (REG_P (XEXP (x
, 2)))
10121 if (extra_cost
->alu
.non_exec_costs_exec
)
10122 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10124 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10127 *cost
+= op1cost
+ op2cost
;
10133 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10137 machine_mode op0mode
;
10138 /* We'll mostly assume that the cost of a compare is the cost of the
10139 LHS. However, there are some notable exceptions. */
10141 /* Floating point compares are never done as side-effects. */
10142 op0mode
= GET_MODE (XEXP (x
, 0));
10143 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10144 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10147 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10149 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10151 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10157 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10159 *cost
= LIBCALL_COST (2);
10163 /* DImode compares normally take two insns. */
10164 if (op0mode
== DImode
)
10166 *cost
+= COSTS_N_INSNS (1);
10168 *cost
+= 2 * extra_cost
->alu
.arith
;
10172 if (op0mode
== SImode
)
10177 if (XEXP (x
, 1) == const0_rtx
10178 && !(REG_P (XEXP (x
, 0))
10179 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10180 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10182 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10184 /* Multiply operations that set the flags are often
10185 significantly more expensive. */
10187 && GET_CODE (XEXP (x
, 0)) == MULT
10188 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10189 *cost
+= extra_cost
->mult
[0].flag_setting
;
10192 && GET_CODE (XEXP (x
, 0)) == PLUS
10193 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10194 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10196 *cost
+= extra_cost
->mult
[0].flag_setting
;
10201 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10202 if (shift_op
!= NULL
)
10204 if (shift_reg
!= NULL
)
10206 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10209 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10212 *cost
+= extra_cost
->alu
.arith_shift
;
10213 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10214 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10219 *cost
+= extra_cost
->alu
.arith
;
10220 if (CONST_INT_P (XEXP (x
, 1))
10221 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10223 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10231 *cost
= LIBCALL_COST (2);
10254 if (outer_code
== SET
)
10256 /* Is it a store-flag operation? */
10257 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10258 && XEXP (x
, 1) == const0_rtx
)
10260 /* Thumb also needs an IT insn. */
10261 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10264 if (XEXP (x
, 1) == const0_rtx
)
10269 /* LSR Rd, Rn, #31. */
10271 *cost
+= extra_cost
->alu
.shift
;
10281 *cost
+= COSTS_N_INSNS (1);
10285 /* RSBS T1, Rn, Rn, LSR #31
10287 *cost
+= COSTS_N_INSNS (1);
10289 *cost
+= extra_cost
->alu
.arith_shift
;
10293 /* RSB Rd, Rn, Rn, ASR #1
10294 LSR Rd, Rd, #31. */
10295 *cost
+= COSTS_N_INSNS (1);
10297 *cost
+= (extra_cost
->alu
.arith_shift
10298 + extra_cost
->alu
.shift
);
10304 *cost
+= COSTS_N_INSNS (1);
10306 *cost
+= extra_cost
->alu
.shift
;
10310 /* Remaining cases are either meaningless or would take
10311 three insns anyway. */
10312 *cost
= COSTS_N_INSNS (3);
10315 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10320 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10321 if (CONST_INT_P (XEXP (x
, 1))
10322 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10324 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10331 /* Not directly inside a set. If it involves the condition code
10332 register it must be the condition for a branch, cond_exec or
10333 I_T_E operation. Since the comparison is performed elsewhere
10334 this is just the control part which has no additional
10336 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10337 && XEXP (x
, 1) == const0_rtx
)
10345 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10346 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10349 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10353 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10355 *cost
= LIBCALL_COST (1);
10359 if (mode
== SImode
)
10362 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10366 *cost
= LIBCALL_COST (1);
10370 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10371 && MEM_P (XEXP (x
, 0)))
10373 if (mode
== DImode
)
10374 *cost
+= COSTS_N_INSNS (1);
10379 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10380 *cost
+= extra_cost
->ldst
.load
;
10382 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10384 if (mode
== DImode
)
10385 *cost
+= extra_cost
->alu
.shift
;
10390 /* Widening from less than 32-bits requires an extend operation. */
10391 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10393 /* We have SXTB/SXTH. */
10394 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10396 *cost
+= extra_cost
->alu
.extend
;
10398 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10400 /* Needs two shifts. */
10401 *cost
+= COSTS_N_INSNS (1);
10402 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10404 *cost
+= 2 * extra_cost
->alu
.shift
;
10407 /* Widening beyond 32-bits requires one more insn. */
10408 if (mode
== DImode
)
10410 *cost
+= COSTS_N_INSNS (1);
10412 *cost
+= extra_cost
->alu
.shift
;
10419 || GET_MODE (XEXP (x
, 0)) == SImode
10420 || GET_MODE (XEXP (x
, 0)) == QImode
)
10421 && MEM_P (XEXP (x
, 0)))
10423 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10425 if (mode
== DImode
)
10426 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10431 /* Widening from less than 32-bits requires an extend operation. */
10432 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10434 /* UXTB can be a shorter instruction in Thumb2, but it might
10435 be slower than the AND Rd, Rn, #255 alternative. When
10436 optimizing for speed it should never be slower to use
10437 AND, and we don't really model 16-bit vs 32-bit insns
10440 *cost
+= extra_cost
->alu
.logical
;
10442 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10444 /* We have UXTB/UXTH. */
10445 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10447 *cost
+= extra_cost
->alu
.extend
;
10449 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10451 /* Needs two shifts. It's marginally preferable to use
10452 shifts rather than two BIC instructions as the second
10453 shift may merge with a subsequent insn as a shifter
10455 *cost
= COSTS_N_INSNS (2);
10456 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10458 *cost
+= 2 * extra_cost
->alu
.shift
;
10461 /* Widening beyond 32-bits requires one more insn. */
10462 if (mode
== DImode
)
10464 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10471 /* CONST_INT has no mode, so we cannot tell for sure how many
10472 insns are really going to be needed. The best we can do is
10473 look at the value passed. If it fits in SImode, then assume
10474 that's the mode it will be used for. Otherwise assume it
10475 will be used in DImode. */
10476 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10481 /* Avoid blowing up in arm_gen_constant (). */
10482 if (!(outer_code
== PLUS
10483 || outer_code
== AND
10484 || outer_code
== IOR
10485 || outer_code
== XOR
10486 || outer_code
== MINUS
))
10490 if (mode
== SImode
)
10492 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10493 INTVAL (x
), NULL
, NULL
,
10499 *cost
+= COSTS_N_INSNS (arm_gen_constant
10500 (outer_code
, SImode
, NULL
,
10501 trunc_int_for_mode (INTVAL (x
), SImode
),
10503 + arm_gen_constant (outer_code
, SImode
, NULL
,
10504 INTVAL (x
) >> 32, NULL
,
10516 if (arm_arch_thumb2
&& !flag_pic
)
10517 *cost
+= COSTS_N_INSNS (1);
10519 *cost
+= extra_cost
->ldst
.load
;
10522 *cost
+= COSTS_N_INSNS (1);
10526 *cost
+= COSTS_N_INSNS (1);
10528 *cost
+= extra_cost
->alu
.arith
;
10534 *cost
= COSTS_N_INSNS (4);
10539 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10540 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10542 if (vfp3_const_double_rtx (x
))
10545 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10551 if (mode
== DFmode
)
10552 *cost
+= extra_cost
->ldst
.loadd
;
10554 *cost
+= extra_cost
->ldst
.loadf
;
10557 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10561 *cost
= COSTS_N_INSNS (4);
10567 && TARGET_HARD_FLOAT
10568 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10569 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10570 *cost
= COSTS_N_INSNS (1);
10572 *cost
= COSTS_N_INSNS (4);
10577 /* When optimizing for size, we prefer constant pool entries to
10578 MOVW/MOVT pairs, so bump the cost of these slightly. */
10585 *cost
+= extra_cost
->alu
.clz
;
10589 if (XEXP (x
, 1) == const0_rtx
)
10592 *cost
+= extra_cost
->alu
.log_shift
;
10593 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10596 /* Fall through. */
10600 *cost
+= COSTS_N_INSNS (1);
10604 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10605 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10606 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10607 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10608 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10609 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10610 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10611 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10615 *cost
+= extra_cost
->mult
[1].extend
;
10616 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10617 ZERO_EXTEND
, 0, speed_p
)
10618 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10619 ZERO_EXTEND
, 0, speed_p
));
10622 *cost
= LIBCALL_COST (1);
10625 case UNSPEC_VOLATILE
:
10627 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10630 /* Reading the PC is like reading any other register. Writing it
10631 is more expensive, but we take that into account elsewhere. */
10636 /* TODO: Simple zero_extract of bottom bits using AND. */
10637 /* Fall through. */
10641 && CONST_INT_P (XEXP (x
, 1))
10642 && CONST_INT_P (XEXP (x
, 2)))
10645 *cost
+= extra_cost
->alu
.bfx
;
10646 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10649 /* Without UBFX/SBFX, need to resort to shift operations. */
10650 *cost
+= COSTS_N_INSNS (1);
10652 *cost
+= 2 * extra_cost
->alu
.shift
;
10653 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10657 if (TARGET_HARD_FLOAT
)
10660 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10661 if (!TARGET_FPU_ARMV8
10662 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10664 /* Pre v8, widening HF->DF is a two-step process, first
10665 widening to SFmode. */
10666 *cost
+= COSTS_N_INSNS (1);
10668 *cost
+= extra_cost
->fp
[0].widen
;
10670 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10674 *cost
= LIBCALL_COST (1);
10677 case FLOAT_TRUNCATE
:
10678 if (TARGET_HARD_FLOAT
)
10681 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10682 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10684 /* Vector modes? */
10686 *cost
= LIBCALL_COST (1);
10690 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10692 rtx op0
= XEXP (x
, 0);
10693 rtx op1
= XEXP (x
, 1);
10694 rtx op2
= XEXP (x
, 2);
10697 /* vfms or vfnma. */
10698 if (GET_CODE (op0
) == NEG
)
10699 op0
= XEXP (op0
, 0);
10701 /* vfnms or vfnma. */
10702 if (GET_CODE (op2
) == NEG
)
10703 op2
= XEXP (op2
, 0);
10705 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10706 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10707 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10710 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10715 *cost
= LIBCALL_COST (3);
10720 if (TARGET_HARD_FLOAT
)
10722 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10723 a vcvt fixed-point conversion. */
10724 if (code
== FIX
&& mode
== SImode
10725 && GET_CODE (XEXP (x
, 0)) == FIX
10726 && GET_MODE (XEXP (x
, 0)) == SFmode
10727 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10728 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10732 *cost
+= extra_cost
->fp
[0].toint
;
10734 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10739 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10741 mode
= GET_MODE (XEXP (x
, 0));
10743 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10744 /* Strip of the 'cost' of rounding towards zero. */
10745 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10746 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10749 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10750 /* ??? Increase the cost to deal with transferring from
10751 FP -> CORE registers? */
10754 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10755 && TARGET_FPU_ARMV8
)
10758 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10761 /* Vector costs? */
10763 *cost
= LIBCALL_COST (1);
10767 case UNSIGNED_FLOAT
:
10768 if (TARGET_HARD_FLOAT
)
10770 /* ??? Increase the cost to deal with transferring from CORE
10771 -> FP registers? */
10773 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10776 *cost
= LIBCALL_COST (1);
10784 /* Just a guess. Guess number of instructions in the asm
10785 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10786 though (see PR60663). */
10787 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10788 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10790 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10794 if (mode
!= VOIDmode
)
10795 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10797 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10802 #undef HANDLE_NARROW_SHIFT_ARITH
10804 /* RTX costs entry point. */
10807 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10808 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10811 int code
= GET_CODE (x
);
10812 gcc_assert (current_tune
->insn_extra_cost
);
10814 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10815 (enum rtx_code
) outer_code
,
10816 current_tune
->insn_extra_cost
,
10819 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10821 print_rtl_single (dump_file
, x
);
10822 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10823 *total
, result
? "final" : "partial");
10828 /* All address computations that can be done are free, but rtx cost returns
10829 the same for practically all of them. So we weight the different types
10830 of address here in the order (most pref first):
10831 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10833 arm_arm_address_cost (rtx x
)
10835 enum rtx_code c
= GET_CODE (x
);
10837 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10839 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10844 if (CONST_INT_P (XEXP (x
, 1)))
10847 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10857 arm_thumb_address_cost (rtx x
)
10859 enum rtx_code c
= GET_CODE (x
);
10864 && REG_P (XEXP (x
, 0))
10865 && CONST_INT_P (XEXP (x
, 1)))
10872 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10873 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10875 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10878 /* Adjust cost hook for XScale. */
10880 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10883 /* Some true dependencies can have a higher cost depending
10884 on precisely how certain input operands are used. */
10886 && recog_memoized (insn
) >= 0
10887 && recog_memoized (dep
) >= 0)
10889 int shift_opnum
= get_attr_shift (insn
);
10890 enum attr_type attr_type
= get_attr_type (dep
);
10892 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10893 operand for INSN. If we have a shifted input operand and the
10894 instruction we depend on is another ALU instruction, then we may
10895 have to account for an additional stall. */
10896 if (shift_opnum
!= 0
10897 && (attr_type
== TYPE_ALU_SHIFT_IMM
10898 || attr_type
== TYPE_ALUS_SHIFT_IMM
10899 || attr_type
== TYPE_LOGIC_SHIFT_IMM
10900 || attr_type
== TYPE_LOGICS_SHIFT_IMM
10901 || attr_type
== TYPE_ALU_SHIFT_REG
10902 || attr_type
== TYPE_ALUS_SHIFT_REG
10903 || attr_type
== TYPE_LOGIC_SHIFT_REG
10904 || attr_type
== TYPE_LOGICS_SHIFT_REG
10905 || attr_type
== TYPE_MOV_SHIFT
10906 || attr_type
== TYPE_MVN_SHIFT
10907 || attr_type
== TYPE_MOV_SHIFT_REG
10908 || attr_type
== TYPE_MVN_SHIFT_REG
))
10910 rtx shifted_operand
;
10913 /* Get the shifted operand. */
10914 extract_insn (insn
);
10915 shifted_operand
= recog_data
.operand
[shift_opnum
];
10917 /* Iterate over all the operands in DEP. If we write an operand
10918 that overlaps with SHIFTED_OPERAND, then we have increase the
10919 cost of this dependency. */
10920 extract_insn (dep
);
10921 preprocess_constraints (dep
);
10922 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
10924 /* We can ignore strict inputs. */
10925 if (recog_data
.operand_type
[opno
] == OP_IN
)
10928 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
10940 /* Adjust cost hook for Cortex A9. */
10942 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10952 case REG_DEP_OUTPUT
:
10953 if (recog_memoized (insn
) >= 0
10954 && recog_memoized (dep
) >= 0)
10956 if (GET_CODE (PATTERN (insn
)) == SET
)
10959 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
10961 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
10963 enum attr_type attr_type_insn
= get_attr_type (insn
);
10964 enum attr_type attr_type_dep
= get_attr_type (dep
);
10966 /* By default all dependencies of the form
10969 have an extra latency of 1 cycle because
10970 of the input and output dependency in this
10971 case. However this gets modeled as an true
10972 dependency and hence all these checks. */
10973 if (REG_P (SET_DEST (PATTERN (insn
)))
10974 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
10976 /* FMACS is a special case where the dependent
10977 instruction can be issued 3 cycles before
10978 the normal latency in case of an output
10980 if ((attr_type_insn
== TYPE_FMACS
10981 || attr_type_insn
== TYPE_FMACD
)
10982 && (attr_type_dep
== TYPE_FMACS
10983 || attr_type_dep
== TYPE_FMACD
))
10985 if (dep_type
== REG_DEP_OUTPUT
)
10986 *cost
= insn_default_latency (dep
) - 3;
10988 *cost
= insn_default_latency (dep
);
10993 if (dep_type
== REG_DEP_OUTPUT
)
10994 *cost
= insn_default_latency (dep
) + 1;
10996 *cost
= insn_default_latency (dep
);
11006 gcc_unreachable ();
11012 /* Adjust cost hook for FA726TE. */
11014 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11017 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11018 have penalty of 3. */
11019 if (dep_type
== REG_DEP_TRUE
11020 && recog_memoized (insn
) >= 0
11021 && recog_memoized (dep
) >= 0
11022 && get_attr_conds (dep
) == CONDS_SET
)
11024 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11025 if (get_attr_conds (insn
) == CONDS_USE
11026 && get_attr_type (insn
) != TYPE_BRANCH
)
11032 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11033 || get_attr_conds (insn
) == CONDS_USE
)
11043 /* Implement TARGET_REGISTER_MOVE_COST.
11045 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11046 it is typically more expensive than a single memory access. We set
11047 the cost to less than two memory accesses so that floating
11048 point to integer conversion does not go through memory. */
11051 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11052 reg_class_t from
, reg_class_t to
)
11056 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11057 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11059 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11060 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11062 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11069 if (from
== HI_REGS
|| to
== HI_REGS
)
11076 /* Implement TARGET_MEMORY_MOVE_COST. */
11079 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11080 bool in ATTRIBUTE_UNUSED
)
11086 if (GET_MODE_SIZE (mode
) < 4)
11089 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11093 /* Vectorizer cost model implementation. */
11095 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11097 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11099 int misalign ATTRIBUTE_UNUSED
)
11103 switch (type_of_cost
)
11106 return current_tune
->vec_costs
->scalar_stmt_cost
;
11109 return current_tune
->vec_costs
->scalar_load_cost
;
11112 return current_tune
->vec_costs
->scalar_store_cost
;
11115 return current_tune
->vec_costs
->vec_stmt_cost
;
11118 return current_tune
->vec_costs
->vec_align_load_cost
;
11121 return current_tune
->vec_costs
->vec_store_cost
;
11123 case vec_to_scalar
:
11124 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11126 case scalar_to_vec
:
11127 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11129 case unaligned_load
:
11130 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11132 case unaligned_store
:
11133 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11135 case cond_branch_taken
:
11136 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11138 case cond_branch_not_taken
:
11139 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11142 case vec_promote_demote
:
11143 return current_tune
->vec_costs
->vec_stmt_cost
;
11145 case vec_construct
:
11146 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11147 return elements
/ 2 + 1;
11150 gcc_unreachable ();
11154 /* Implement targetm.vectorize.add_stmt_cost. */
11157 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11158 struct _stmt_vec_info
*stmt_info
, int misalign
,
11159 enum vect_cost_model_location where
)
11161 unsigned *cost
= (unsigned *) data
;
11162 unsigned retval
= 0;
11164 if (flag_vect_cost_model
)
11166 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11167 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11169 /* Statements in an inner loop relative to the loop being
11170 vectorized are weighted more heavily. The value here is
11171 arbitrary and could potentially be improved with analysis. */
11172 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11173 count
*= 50; /* FIXME. */
11175 retval
= (unsigned) (count
* stmt_cost
);
11176 cost
[where
] += retval
;
11182 /* Return true if and only if this insn can dual-issue only as older. */
11184 cortexa7_older_only (rtx_insn
*insn
)
11186 if (recog_memoized (insn
) < 0)
11189 switch (get_attr_type (insn
))
11191 case TYPE_ALU_DSP_REG
:
11192 case TYPE_ALU_SREG
:
11193 case TYPE_ALUS_SREG
:
11194 case TYPE_LOGIC_REG
:
11195 case TYPE_LOGICS_REG
:
11197 case TYPE_ADCS_REG
:
11202 case TYPE_SHIFT_IMM
:
11203 case TYPE_SHIFT_REG
:
11204 case TYPE_LOAD_BYTE
:
11207 case TYPE_FFARITHS
:
11209 case TYPE_FFARITHD
:
11227 case TYPE_F_STORES
:
11234 /* Return true if and only if this insn can dual-issue as younger. */
11236 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11238 if (recog_memoized (insn
) < 0)
11241 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11245 switch (get_attr_type (insn
))
11248 case TYPE_ALUS_IMM
:
11249 case TYPE_LOGIC_IMM
:
11250 case TYPE_LOGICS_IMM
:
11255 case TYPE_MOV_SHIFT
:
11256 case TYPE_MOV_SHIFT_REG
:
11266 /* Look for an instruction that can dual issue only as an older
11267 instruction, and move it in front of any instructions that can
11268 dual-issue as younger, while preserving the relative order of all
11269 other instructions in the ready list. This is a hueuristic to help
11270 dual-issue in later cycles, by postponing issue of more flexible
11271 instructions. This heuristic may affect dual issue opportunities
11272 in the current cycle. */
11274 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11275 int *n_readyp
, int clock
)
11278 int first_older_only
= -1, first_younger
= -1;
11282 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11286 /* Traverse the ready list from the head (the instruction to issue
11287 first), and looking for the first instruction that can issue as
11288 younger and the first instruction that can dual-issue only as
11290 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11292 rtx_insn
*insn
= ready
[i
];
11293 if (cortexa7_older_only (insn
))
11295 first_older_only
= i
;
11297 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11300 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11304 /* Nothing to reorder because either no younger insn found or insn
11305 that can dual-issue only as older appears before any insn that
11306 can dual-issue as younger. */
11307 if (first_younger
== -1)
11310 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11314 /* Nothing to reorder because no older-only insn in the ready list. */
11315 if (first_older_only
== -1)
11318 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11322 /* Move first_older_only insn before first_younger. */
11324 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11325 INSN_UID(ready
[first_older_only
]),
11326 INSN_UID(ready
[first_younger
]));
11327 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11328 for (i
= first_older_only
; i
< first_younger
; i
++)
11330 ready
[i
] = ready
[i
+1];
11333 ready
[i
] = first_older_only_insn
;
11337 /* Implement TARGET_SCHED_REORDER. */
11339 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11344 case TARGET_CPU_cortexa7
:
11345 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11348 /* Do nothing for other cores. */
11352 return arm_issue_rate ();
11355 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11356 It corrects the value of COST based on the relationship between
11357 INSN and DEP through the dependence LINK. It returns the new
11358 value. There is a per-core adjust_cost hook to adjust scheduler costs
11359 and the per-core hook can choose to completely override the generic
11360 adjust_cost function. Only put bits of code into arm_adjust_cost that
11361 are common across all cores. */
11363 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11368 /* When generating Thumb-1 code, we want to place flag-setting operations
11369 close to a conditional branch which depends on them, so that we can
11370 omit the comparison. */
11373 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11374 && recog_memoized (dep
) >= 0
11375 && get_attr_conds (dep
) == CONDS_SET
)
11378 if (current_tune
->sched_adjust_cost
!= NULL
)
11380 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11384 /* XXX Is this strictly true? */
11385 if (dep_type
== REG_DEP_ANTI
11386 || dep_type
== REG_DEP_OUTPUT
)
11389 /* Call insns don't incur a stall, even if they follow a load. */
11394 if ((i_pat
= single_set (insn
)) != NULL
11395 && MEM_P (SET_SRC (i_pat
))
11396 && (d_pat
= single_set (dep
)) != NULL
11397 && MEM_P (SET_DEST (d_pat
)))
11399 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11400 /* This is a load after a store, there is no conflict if the load reads
11401 from a cached area. Assume that loads from the stack, and from the
11402 constant pool are cached, and that others will miss. This is a
11405 if ((GET_CODE (src_mem
) == SYMBOL_REF
11406 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11407 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11408 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11409 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11417 arm_max_conditional_execute (void)
11419 return max_insns_skipped
;
11423 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11426 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11428 return (optimize
> 0) ? 2 : 0;
11432 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11434 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11437 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11438 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11439 sequences of non-executed instructions in IT blocks probably take the same
11440 amount of time as executed instructions (and the IT instruction itself takes
11441 space in icache). This function was experimentally determined to give good
11442 results on a popular embedded benchmark. */
11445 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11447 return (TARGET_32BIT
&& speed_p
) ? 1
11448 : arm_default_branch_cost (speed_p
, predictable_p
);
11452 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11454 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11457 static bool fp_consts_inited
= false;
11459 static REAL_VALUE_TYPE value_fp0
;
11462 init_fp_table (void)
11466 r
= REAL_VALUE_ATOF ("0", DFmode
);
11468 fp_consts_inited
= true;
11471 /* Return TRUE if rtx X is a valid immediate FP constant. */
11473 arm_const_double_rtx (rtx x
)
11475 const REAL_VALUE_TYPE
*r
;
11477 if (!fp_consts_inited
)
11480 r
= CONST_DOUBLE_REAL_VALUE (x
);
11481 if (REAL_VALUE_MINUS_ZERO (*r
))
11484 if (real_equal (r
, &value_fp0
))
11490 /* VFPv3 has a fairly wide range of representable immediates, formed from
11491 "quarter-precision" floating-point values. These can be evaluated using this
11492 formula (with ^ for exponentiation):
11496 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11497 16 <= n <= 31 and 0 <= r <= 7.
11499 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11501 - A (most-significant) is the sign bit.
11502 - BCD are the exponent (encoded as r XOR 3).
11503 - EFGH are the mantissa (encoded as n - 16).
11506 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11507 fconst[sd] instruction, or -1 if X isn't suitable. */
11509 vfp3_const_double_index (rtx x
)
11511 REAL_VALUE_TYPE r
, m
;
11512 int sign
, exponent
;
11513 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11514 unsigned HOST_WIDE_INT mask
;
11515 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11518 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11521 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11523 /* We can't represent these things, so detect them first. */
11524 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11527 /* Extract sign, exponent and mantissa. */
11528 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11529 r
= real_value_abs (&r
);
11530 exponent
= REAL_EXP (&r
);
11531 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11532 highest (sign) bit, with a fixed binary point at bit point_pos.
11533 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11534 bits for the mantissa, this may fail (low bits would be lost). */
11535 real_ldexp (&m
, &r
, point_pos
- exponent
);
11536 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11537 mantissa
= w
.elt (0);
11538 mant_hi
= w
.elt (1);
11540 /* If there are bits set in the low part of the mantissa, we can't
11541 represent this value. */
11545 /* Now make it so that mantissa contains the most-significant bits, and move
11546 the point_pos to indicate that the least-significant bits have been
11548 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11549 mantissa
= mant_hi
;
11551 /* We can permit four significant bits of mantissa only, plus a high bit
11552 which is always 1. */
11553 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11554 if ((mantissa
& mask
) != 0)
11557 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11558 mantissa
>>= point_pos
- 5;
11560 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11561 floating-point immediate zero with Neon using an integer-zero load, but
11562 that case is handled elsewhere.) */
11566 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11568 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11569 normalized significands are in the range [1, 2). (Our mantissa is shifted
11570 left 4 places at this point relative to normalized IEEE754 values). GCC
11571 internally uses [0.5, 1) (see real.c), so the exponent returned from
11572 REAL_EXP must be altered. */
11573 exponent
= 5 - exponent
;
11575 if (exponent
< 0 || exponent
> 7)
11578 /* Sign, mantissa and exponent are now in the correct form to plug into the
11579 formula described in the comment above. */
11580 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11583 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11585 vfp3_const_double_rtx (rtx x
)
11590 return vfp3_const_double_index (x
) != -1;
11593 /* Recognize immediates which can be used in various Neon instructions. Legal
11594 immediates are described by the following table (for VMVN variants, the
11595 bitwise inverse of the constant shown is recognized. In either case, VMOV
11596 is output and the correct instruction to use for a given constant is chosen
11597 by the assembler). The constant shown is replicated across all elements of
11598 the destination vector.
11600 insn elems variant constant (binary)
11601 ---- ----- ------- -----------------
11602 vmov i32 0 00000000 00000000 00000000 abcdefgh
11603 vmov i32 1 00000000 00000000 abcdefgh 00000000
11604 vmov i32 2 00000000 abcdefgh 00000000 00000000
11605 vmov i32 3 abcdefgh 00000000 00000000 00000000
11606 vmov i16 4 00000000 abcdefgh
11607 vmov i16 5 abcdefgh 00000000
11608 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11609 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11610 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11611 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11612 vmvn i16 10 00000000 abcdefgh
11613 vmvn i16 11 abcdefgh 00000000
11614 vmov i32 12 00000000 00000000 abcdefgh 11111111
11615 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11616 vmov i32 14 00000000 abcdefgh 11111111 11111111
11617 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11618 vmov i8 16 abcdefgh
11619 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11620 eeeeeeee ffffffff gggggggg hhhhhhhh
11621 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11622 vmov f32 19 00000000 00000000 00000000 00000000
11624 For case 18, B = !b. Representable values are exactly those accepted by
11625 vfp3_const_double_index, but are output as floating-point numbers rather
11628 For case 19, we will change it to vmov.i32 when assembling.
11630 Variants 0-5 (inclusive) may also be used as immediates for the second
11631 operand of VORR/VBIC instructions.
11633 The INVERSE argument causes the bitwise inverse of the given operand to be
11634 recognized instead (used for recognizing legal immediates for the VAND/VORN
11635 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11636 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11637 output, rather than the real insns vbic/vorr).
11639 INVERSE makes no difference to the recognition of float vectors.
11641 The return value is the variant of immediate as shown in the above table, or
11642 -1 if the given value doesn't match any of the listed patterns.
11645 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11646 rtx
*modconst
, int *elementwidth
)
11648 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11650 for (i = 0; i < idx; i += (STRIDE)) \
11655 immtype = (CLASS); \
11656 elsize = (ELSIZE); \
11660 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11661 unsigned int innersize
;
11662 unsigned char bytes
[16];
11663 int immtype
= -1, matches
;
11664 unsigned int invmask
= inverse
? 0xff : 0;
11665 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11668 n_elts
= CONST_VECTOR_NUNITS (op
);
11672 if (mode
== VOIDmode
)
11676 innersize
= GET_MODE_UNIT_SIZE (mode
);
11678 /* Vectors of float constants. */
11679 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11681 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11683 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11686 /* FP16 vectors cannot be represented. */
11687 if (GET_MODE_INNER (mode
) == HFmode
)
11690 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11691 are distinct in this context. */
11692 if (!const_vec_duplicate_p (op
))
11696 *modconst
= CONST_VECTOR_ELT (op
, 0);
11701 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11707 /* The tricks done in the code below apply for little-endian vector layout.
11708 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11709 FIXME: Implement logic for big-endian vectors. */
11710 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11713 /* Splat vector constant out into a byte vector. */
11714 for (i
= 0; i
< n_elts
; i
++)
11716 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11717 unsigned HOST_WIDE_INT elpart
;
11719 gcc_assert (CONST_INT_P (el
));
11720 elpart
= INTVAL (el
);
11722 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11724 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11725 elpart
>>= BITS_PER_UNIT
;
11729 /* Sanity check. */
11730 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11734 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11735 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11737 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11738 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11740 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11741 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11743 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11744 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11746 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11748 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11750 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11751 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11753 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11754 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11756 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11757 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11759 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11760 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11762 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11764 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11766 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11767 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11769 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11770 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11772 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11773 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11775 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11776 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11778 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11780 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11781 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11789 *elementwidth
= elsize
;
11793 unsigned HOST_WIDE_INT imm
= 0;
11795 /* Un-invert bytes of recognized vector, if necessary. */
11797 for (i
= 0; i
< idx
; i
++)
11798 bytes
[i
] ^= invmask
;
11802 /* FIXME: Broken on 32-bit H_W_I hosts. */
11803 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11805 for (i
= 0; i
< 8; i
++)
11806 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11807 << (i
* BITS_PER_UNIT
);
11809 *modconst
= GEN_INT (imm
);
11813 unsigned HOST_WIDE_INT imm
= 0;
11815 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11816 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11818 *modconst
= GEN_INT (imm
);
11826 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11827 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11828 float elements), and a modified constant (whatever should be output for a
11829 VMOV) in *MODCONST. */
11832 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11833 rtx
*modconst
, int *elementwidth
)
11837 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11843 *modconst
= tmpconst
;
11846 *elementwidth
= tmpwidth
;
11851 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11852 the immediate is valid, write a constant suitable for using as an operand
11853 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11854 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11857 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11858 rtx
*modconst
, int *elementwidth
)
11862 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11864 if (retval
< 0 || retval
> 5)
11868 *modconst
= tmpconst
;
11871 *elementwidth
= tmpwidth
;
11876 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11877 the immediate is valid, write a constant suitable for using as an operand
11878 to VSHR/VSHL to *MODCONST and the corresponding element width to
11879 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11880 because they have different limitations. */
11883 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11884 rtx
*modconst
, int *elementwidth
,
11887 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11888 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11889 unsigned HOST_WIDE_INT last_elt
= 0;
11890 unsigned HOST_WIDE_INT maxshift
;
11892 /* Split vector constant out into a byte vector. */
11893 for (i
= 0; i
< n_elts
; i
++)
11895 rtx el
= CONST_VECTOR_ELT (op
, i
);
11896 unsigned HOST_WIDE_INT elpart
;
11898 if (CONST_INT_P (el
))
11899 elpart
= INTVAL (el
);
11900 else if (CONST_DOUBLE_P (el
))
11903 gcc_unreachable ();
11905 if (i
!= 0 && elpart
!= last_elt
)
11911 /* Shift less than element size. */
11912 maxshift
= innersize
* 8;
11916 /* Left shift immediate value can be from 0 to <size>-1. */
11917 if (last_elt
>= maxshift
)
11922 /* Right shift immediate value can be from 1 to <size>. */
11923 if (last_elt
== 0 || last_elt
> maxshift
)
11928 *elementwidth
= innersize
* 8;
11931 *modconst
= CONST_VECTOR_ELT (op
, 0);
11936 /* Return a string suitable for output of Neon immediate logic operation
11940 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
11941 int inverse
, int quad
)
11943 int width
, is_valid
;
11944 static char templ
[40];
11946 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
11948 gcc_assert (is_valid
!= 0);
11951 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
11953 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
11958 /* Return a string suitable for output of Neon immediate shift operation
11959 (VSHR or VSHL) MNEM. */
11962 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
11963 machine_mode mode
, int quad
,
11966 int width
, is_valid
;
11967 static char templ
[40];
11969 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
11970 gcc_assert (is_valid
!= 0);
11973 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
11975 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
11980 /* Output a sequence of pairwise operations to implement a reduction.
11981 NOTE: We do "too much work" here, because pairwise operations work on two
11982 registers-worth of operands in one go. Unfortunately we can't exploit those
11983 extra calculations to do the full operation in fewer steps, I don't think.
11984 Although all vector elements of the result but the first are ignored, we
11985 actually calculate the same result in each of the elements. An alternative
11986 such as initially loading a vector with zero to use as each of the second
11987 operands would use up an additional register and take an extra instruction,
11988 for no particular gain. */
11991 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
11992 rtx (*reduc
) (rtx
, rtx
, rtx
))
11994 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
11997 for (i
= parts
/ 2; i
>= 1; i
/= 2)
11999 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12000 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12005 /* If VALS is a vector constant that can be loaded into a register
12006 using VDUP, generate instructions to do so and return an RTX to
12007 assign to the register. Otherwise return NULL_RTX. */
12010 neon_vdup_constant (rtx vals
)
12012 machine_mode mode
= GET_MODE (vals
);
12013 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12016 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12019 if (!const_vec_duplicate_p (vals
, &x
))
12020 /* The elements are not all the same. We could handle repeating
12021 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12022 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12026 /* We can load this constant by using VDUP and a constant in a
12027 single ARM register. This will be cheaper than a vector
12030 x
= copy_to_mode_reg (inner_mode
, x
);
12031 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12034 /* Generate code to load VALS, which is a PARALLEL containing only
12035 constants (for vec_init) or CONST_VECTOR, efficiently into a
12036 register. Returns an RTX to copy into the register, or NULL_RTX
12037 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12040 neon_make_constant (rtx vals
)
12042 machine_mode mode
= GET_MODE (vals
);
12044 rtx const_vec
= NULL_RTX
;
12045 int n_elts
= GET_MODE_NUNITS (mode
);
12049 if (GET_CODE (vals
) == CONST_VECTOR
)
12051 else if (GET_CODE (vals
) == PARALLEL
)
12053 /* A CONST_VECTOR must contain only CONST_INTs and
12054 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12055 Only store valid constants in a CONST_VECTOR. */
12056 for (i
= 0; i
< n_elts
; ++i
)
12058 rtx x
= XVECEXP (vals
, 0, i
);
12059 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12062 if (n_const
== n_elts
)
12063 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12066 gcc_unreachable ();
12068 if (const_vec
!= NULL
12069 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12070 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12072 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12073 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12074 pipeline cycle; creating the constant takes one or two ARM
12075 pipeline cycles. */
12077 else if (const_vec
!= NULL_RTX
)
12078 /* Load from constant pool. On Cortex-A8 this takes two cycles
12079 (for either double or quad vectors). We can not take advantage
12080 of single-cycle VLD1 because we need a PC-relative addressing
12084 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12085 We can not construct an initializer. */
12089 /* Initialize vector TARGET to VALS. */
12092 neon_expand_vector_init (rtx target
, rtx vals
)
12094 machine_mode mode
= GET_MODE (target
);
12095 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12096 int n_elts
= GET_MODE_NUNITS (mode
);
12097 int n_var
= 0, one_var
= -1;
12098 bool all_same
= true;
12102 for (i
= 0; i
< n_elts
; ++i
)
12104 x
= XVECEXP (vals
, 0, i
);
12105 if (!CONSTANT_P (x
))
12106 ++n_var
, one_var
= i
;
12108 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12114 rtx constant
= neon_make_constant (vals
);
12115 if (constant
!= NULL_RTX
)
12117 emit_move_insn (target
, constant
);
12122 /* Splat a single non-constant element if we can. */
12123 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12125 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12126 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12130 /* One field is non-constant. Load constant then overwrite varying
12131 field. This is more efficient than using the stack. */
12134 rtx copy
= copy_rtx (vals
);
12135 rtx index
= GEN_INT (one_var
);
12137 /* Load constant part of vector, substitute neighboring value for
12138 varying element. */
12139 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12140 neon_expand_vector_init (target
, copy
);
12142 /* Insert variable. */
12143 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12147 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12150 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12153 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12156 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12159 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12162 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12165 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12168 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12171 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12174 gcc_unreachable ();
12179 /* Construct the vector in memory one field at a time
12180 and load the whole vector. */
12181 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12182 for (i
= 0; i
< n_elts
; i
++)
12183 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12184 i
* GET_MODE_SIZE (inner_mode
)),
12185 XVECEXP (vals
, 0, i
));
12186 emit_move_insn (target
, mem
);
12189 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12190 ERR if it doesn't. EXP indicates the source location, which includes the
12191 inlining history for intrinsics. */
12194 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12195 const_tree exp
, const char *desc
)
12197 HOST_WIDE_INT lane
;
12199 gcc_assert (CONST_INT_P (operand
));
12201 lane
= INTVAL (operand
);
12203 if (lane
< low
|| lane
>= high
)
12206 error ("%K%s %wd out of range %wd - %wd",
12207 exp
, desc
, lane
, low
, high
- 1);
12209 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12213 /* Bounds-check lanes. */
12216 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12219 bounds_check (operand
, low
, high
, exp
, "lane");
12222 /* Bounds-check constants. */
12225 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12227 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12231 neon_element_bits (machine_mode mode
)
12233 return GET_MODE_UNIT_BITSIZE (mode
);
12237 /* Predicates for `match_operand' and `match_operator'. */
12239 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12240 WB is true if full writeback address modes are allowed and is false
12241 if limited writeback address modes (POST_INC and PRE_DEC) are
12245 arm_coproc_mem_operand (rtx op
, bool wb
)
12249 /* Reject eliminable registers. */
12250 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12251 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12252 || reg_mentioned_p (arg_pointer_rtx
, op
)
12253 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12254 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12255 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12256 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12259 /* Constants are converted into offsets from labels. */
12263 ind
= XEXP (op
, 0);
12265 if (reload_completed
12266 && (GET_CODE (ind
) == LABEL_REF
12267 || (GET_CODE (ind
) == CONST
12268 && GET_CODE (XEXP (ind
, 0)) == PLUS
12269 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12270 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12273 /* Match: (mem (reg)). */
12275 return arm_address_register_rtx_p (ind
, 0);
12277 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12278 acceptable in any case (subject to verification by
12279 arm_address_register_rtx_p). We need WB to be true to accept
12280 PRE_INC and POST_DEC. */
12281 if (GET_CODE (ind
) == POST_INC
12282 || GET_CODE (ind
) == PRE_DEC
12284 && (GET_CODE (ind
) == PRE_INC
12285 || GET_CODE (ind
) == POST_DEC
)))
12286 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12289 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12290 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12291 && GET_CODE (XEXP (ind
, 1)) == PLUS
12292 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12293 ind
= XEXP (ind
, 1);
12298 if (GET_CODE (ind
) == PLUS
12299 && REG_P (XEXP (ind
, 0))
12300 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12301 && CONST_INT_P (XEXP (ind
, 1))
12302 && INTVAL (XEXP (ind
, 1)) > -1024
12303 && INTVAL (XEXP (ind
, 1)) < 1024
12304 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12310 /* Return TRUE if OP is a memory operand which we can load or store a vector
12311 to/from. TYPE is one of the following values:
12312 0 - Vector load/stor (vldr)
12313 1 - Core registers (ldm)
12314 2 - Element/structure loads (vld1)
12317 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12321 /* Reject eliminable registers. */
12322 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12323 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12324 || reg_mentioned_p (arg_pointer_rtx
, op
)
12325 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12326 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12327 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12328 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12331 /* Constants are converted into offsets from labels. */
12335 ind
= XEXP (op
, 0);
12337 if (reload_completed
12338 && (GET_CODE (ind
) == LABEL_REF
12339 || (GET_CODE (ind
) == CONST
12340 && GET_CODE (XEXP (ind
, 0)) == PLUS
12341 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12342 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12345 /* Match: (mem (reg)). */
12347 return arm_address_register_rtx_p (ind
, 0);
12349 /* Allow post-increment with Neon registers. */
12350 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12351 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12352 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12354 /* Allow post-increment by register for VLDn */
12355 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12356 && GET_CODE (XEXP (ind
, 1)) == PLUS
12357 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12364 && GET_CODE (ind
) == PLUS
12365 && REG_P (XEXP (ind
, 0))
12366 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12367 && CONST_INT_P (XEXP (ind
, 1))
12368 && INTVAL (XEXP (ind
, 1)) > -1024
12369 /* For quad modes, we restrict the constant offset to be slightly less
12370 than what the instruction format permits. We have no such constraint
12371 on double mode offsets. (This must match arm_legitimate_index_p.) */
12372 && (INTVAL (XEXP (ind
, 1))
12373 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12374 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12380 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12383 neon_struct_mem_operand (rtx op
)
12387 /* Reject eliminable registers. */
12388 if (! (reload_in_progress
|| reload_completed
)
12389 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12390 || reg_mentioned_p (arg_pointer_rtx
, op
)
12391 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12392 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12393 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12394 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12397 /* Constants are converted into offsets from labels. */
12401 ind
= XEXP (op
, 0);
12403 if (reload_completed
12404 && (GET_CODE (ind
) == LABEL_REF
12405 || (GET_CODE (ind
) == CONST
12406 && GET_CODE (XEXP (ind
, 0)) == PLUS
12407 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12408 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12411 /* Match: (mem (reg)). */
12413 return arm_address_register_rtx_p (ind
, 0);
12415 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12416 if (GET_CODE (ind
) == POST_INC
12417 || GET_CODE (ind
) == PRE_DEC
)
12418 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12423 /* Return true if X is a register that will be eliminated later on. */
12425 arm_eliminable_register (rtx x
)
12427 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12428 || REGNO (x
) == ARG_POINTER_REGNUM
12429 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12430 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12433 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12434 coprocessor registers. Otherwise return NO_REGS. */
12437 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12439 if (mode
== HFmode
)
12441 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12442 return GENERAL_REGS
;
12443 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12445 return GENERAL_REGS
;
12448 /* The neon move patterns handle all legitimate vector and struct
12451 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12452 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12453 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12454 || VALID_NEON_STRUCT_MODE (mode
)))
12457 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12460 return GENERAL_REGS
;
12463 /* Values which must be returned in the most-significant end of the return
12467 arm_return_in_msb (const_tree valtype
)
12469 return (TARGET_AAPCS_BASED
12470 && BYTES_BIG_ENDIAN
12471 && (AGGREGATE_TYPE_P (valtype
)
12472 || TREE_CODE (valtype
) == COMPLEX_TYPE
12473 || FIXED_POINT_TYPE_P (valtype
)));
12476 /* Return TRUE if X references a SYMBOL_REF. */
12478 symbol_mentioned_p (rtx x
)
12483 if (GET_CODE (x
) == SYMBOL_REF
)
12486 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12487 are constant offsets, not symbols. */
12488 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12491 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12493 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12499 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12500 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12503 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12510 /* Return TRUE if X references a LABEL_REF. */
12512 label_mentioned_p (rtx x
)
12517 if (GET_CODE (x
) == LABEL_REF
)
12520 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12521 instruction, but they are constant offsets, not symbols. */
12522 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12525 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12526 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12532 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12533 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12536 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12544 tls_mentioned_p (rtx x
)
12546 switch (GET_CODE (x
))
12549 return tls_mentioned_p (XEXP (x
, 0));
12552 if (XINT (x
, 1) == UNSPEC_TLS
)
12555 /* Fall through. */
12561 /* Must not copy any rtx that uses a pc-relative address.
12562 Also, disallow copying of load-exclusive instructions that
12563 may appear after splitting of compare-and-swap-style operations
12564 so as to prevent those loops from being transformed away from their
12565 canonical forms (see PR 69904). */
12568 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12570 /* The tls call insn cannot be copied, as it is paired with a data
12572 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12575 subrtx_iterator::array_type array
;
12576 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12578 const_rtx x
= *iter
;
12579 if (GET_CODE (x
) == UNSPEC
12580 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12581 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12585 rtx set
= single_set (insn
);
12588 rtx src
= SET_SRC (set
);
12589 if (GET_CODE (src
) == ZERO_EXTEND
)
12590 src
= XEXP (src
, 0);
12592 /* Catch the load-exclusive and load-acquire operations. */
12593 if (GET_CODE (src
) == UNSPEC_VOLATILE
12594 && (XINT (src
, 1) == VUNSPEC_LL
12595 || XINT (src
, 1) == VUNSPEC_LAX
))
12602 minmax_code (rtx x
)
12604 enum rtx_code code
= GET_CODE (x
);
12617 gcc_unreachable ();
12621 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12624 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12625 int *mask
, bool *signed_sat
)
12627 /* The high bound must be a power of two minus one. */
12628 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12632 /* The low bound is either zero (for usat) or one less than the
12633 negation of the high bound (for ssat). */
12634 if (INTVAL (lo_bound
) == 0)
12639 *signed_sat
= false;
12644 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12649 *signed_sat
= true;
12657 /* Return 1 if memory locations are adjacent. */
12659 adjacent_mem_locations (rtx a
, rtx b
)
12661 /* We don't guarantee to preserve the order of these memory refs. */
12662 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12665 if ((REG_P (XEXP (a
, 0))
12666 || (GET_CODE (XEXP (a
, 0)) == PLUS
12667 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12668 && (REG_P (XEXP (b
, 0))
12669 || (GET_CODE (XEXP (b
, 0)) == PLUS
12670 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12672 HOST_WIDE_INT val0
= 0, val1
= 0;
12676 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12678 reg0
= XEXP (XEXP (a
, 0), 0);
12679 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12682 reg0
= XEXP (a
, 0);
12684 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12686 reg1
= XEXP (XEXP (b
, 0), 0);
12687 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12690 reg1
= XEXP (b
, 0);
12692 /* Don't accept any offset that will require multiple
12693 instructions to handle, since this would cause the
12694 arith_adjacentmem pattern to output an overlong sequence. */
12695 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12698 /* Don't allow an eliminable register: register elimination can make
12699 the offset too large. */
12700 if (arm_eliminable_register (reg0
))
12703 val_diff
= val1
- val0
;
12707 /* If the target has load delay slots, then there's no benefit
12708 to using an ldm instruction unless the offset is zero and
12709 we are optimizing for size. */
12710 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12711 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12712 && (val_diff
== 4 || val_diff
== -4));
12715 return ((REGNO (reg0
) == REGNO (reg1
))
12716 && (val_diff
== 4 || val_diff
== -4));
12722 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12723 for load operations, false for store operations. CONSECUTIVE is true
12724 if the register numbers in the operation must be consecutive in the register
12725 bank. RETURN_PC is true if value is to be loaded in PC.
12726 The pattern we are trying to match for load is:
12727 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12728 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12731 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12734 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12735 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12736 3. If consecutive is TRUE, then for kth register being loaded,
12737 REGNO (R_dk) = REGNO (R_d0) + k.
12738 The pattern for store is similar. */
12740 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12741 bool consecutive
, bool return_pc
)
12743 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12744 rtx reg
, mem
, addr
;
12746 unsigned first_regno
;
12747 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12749 bool addr_reg_in_reglist
= false;
12750 bool update
= false;
12755 /* If not in SImode, then registers must be consecutive
12756 (e.g., VLDM instructions for DFmode). */
12757 gcc_assert ((mode
== SImode
) || consecutive
);
12758 /* Setting return_pc for stores is illegal. */
12759 gcc_assert (!return_pc
|| load
);
12761 /* Set up the increments and the regs per val based on the mode. */
12762 reg_increment
= GET_MODE_SIZE (mode
);
12763 regs_per_val
= reg_increment
/ 4;
12764 offset_adj
= return_pc
? 1 : 0;
12767 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12768 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12771 /* Check if this is a write-back. */
12772 elt
= XVECEXP (op
, 0, offset_adj
);
12773 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12779 /* The offset adjustment must be the number of registers being
12780 popped times the size of a single register. */
12781 if (!REG_P (SET_DEST (elt
))
12782 || !REG_P (XEXP (SET_SRC (elt
), 0))
12783 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12784 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12785 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12786 ((count
- 1 - offset_adj
) * reg_increment
))
12790 i
= i
+ offset_adj
;
12791 base
= base
+ offset_adj
;
12792 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12793 success depends on the type: VLDM can do just one reg,
12794 LDM must do at least two. */
12795 if ((count
<= i
) && (mode
== SImode
))
12798 elt
= XVECEXP (op
, 0, i
- 1);
12799 if (GET_CODE (elt
) != SET
)
12804 reg
= SET_DEST (elt
);
12805 mem
= SET_SRC (elt
);
12809 reg
= SET_SRC (elt
);
12810 mem
= SET_DEST (elt
);
12813 if (!REG_P (reg
) || !MEM_P (mem
))
12816 regno
= REGNO (reg
);
12817 first_regno
= regno
;
12818 addr
= XEXP (mem
, 0);
12819 if (GET_CODE (addr
) == PLUS
)
12821 if (!CONST_INT_P (XEXP (addr
, 1)))
12824 offset
= INTVAL (XEXP (addr
, 1));
12825 addr
= XEXP (addr
, 0);
12831 /* Don't allow SP to be loaded unless it is also the base register. It
12832 guarantees that SP is reset correctly when an LDM instruction
12833 is interrupted. Otherwise, we might end up with a corrupt stack. */
12834 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12837 for (; i
< count
; i
++)
12839 elt
= XVECEXP (op
, 0, i
);
12840 if (GET_CODE (elt
) != SET
)
12845 reg
= SET_DEST (elt
);
12846 mem
= SET_SRC (elt
);
12850 reg
= SET_SRC (elt
);
12851 mem
= SET_DEST (elt
);
12855 || GET_MODE (reg
) != mode
12856 || REGNO (reg
) <= regno
12859 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12860 /* Don't allow SP to be loaded unless it is also the base register. It
12861 guarantees that SP is reset correctly when an LDM instruction
12862 is interrupted. Otherwise, we might end up with a corrupt stack. */
12863 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12865 || GET_MODE (mem
) != mode
12866 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12867 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12868 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12869 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12870 offset
+ (i
- base
) * reg_increment
))
12871 && (!REG_P (XEXP (mem
, 0))
12872 || offset
+ (i
- base
) * reg_increment
!= 0)))
12875 regno
= REGNO (reg
);
12876 if (regno
== REGNO (addr
))
12877 addr_reg_in_reglist
= true;
12882 if (update
&& addr_reg_in_reglist
)
12885 /* For Thumb-1, address register is always modified - either by write-back
12886 or by explicit load. If the pattern does not describe an update,
12887 then the address register must be in the list of loaded registers. */
12889 return update
|| addr_reg_in_reglist
;
12895 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12896 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12897 instruction. ADD_OFFSET is nonzero if the base address register needs
12898 to be modified with an add instruction before we can use it. */
12901 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
12902 int nops
, HOST_WIDE_INT add_offset
)
12904 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12905 if the offset isn't small enough. The reason 2 ldrs are faster
12906 is because these ARMs are able to do more than one cache access
12907 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12908 whilst the ARM8 has a double bandwidth cache. This means that
12909 these cores can do both an instruction fetch and a data fetch in
12910 a single cycle, so the trick of calculating the address into a
12911 scratch register (one of the result regs) and then doing a load
12912 multiple actually becomes slower (and no smaller in code size).
12913 That is the transformation
12915 ldr rd1, [rbase + offset]
12916 ldr rd2, [rbase + offset + 4]
12920 add rd1, rbase, offset
12921 ldmia rd1, {rd1, rd2}
12923 produces worse code -- '3 cycles + any stalls on rd2' instead of
12924 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12925 access per cycle, the first sequence could never complete in less
12926 than 6 cycles, whereas the ldm sequence would only take 5 and
12927 would make better use of sequential accesses if not hitting the
12930 We cheat here and test 'arm_ld_sched' which we currently know to
12931 only be true for the ARM8, ARM9 and StrongARM. If this ever
12932 changes, then the test below needs to be reworked. */
12933 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
12936 /* XScale has load-store double instructions, but they have stricter
12937 alignment requirements than load-store multiple, so we cannot
12940 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12941 the pipeline until completion.
12949 An ldr instruction takes 1-3 cycles, but does not block the
12958 Best case ldr will always win. However, the more ldr instructions
12959 we issue, the less likely we are to be able to schedule them well.
12960 Using ldr instructions also increases code size.
12962 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12963 for counts of 3 or 4 regs. */
12964 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
12969 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12970 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12971 an array ORDER which describes the sequence to use when accessing the
12972 offsets that produces an ascending order. In this sequence, each
12973 offset must be larger by exactly 4 than the previous one. ORDER[0]
12974 must have been filled in with the lowest offset by the caller.
12975 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12976 we use to verify that ORDER produces an ascending order of registers.
12977 Return true if it was possible to construct such an order, false if
12981 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
12982 int *unsorted_regs
)
12985 for (i
= 1; i
< nops
; i
++)
12989 order
[i
] = order
[i
- 1];
12990 for (j
= 0; j
< nops
; j
++)
12991 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
12993 /* We must find exactly one offset that is higher than the
12994 previous one by 4. */
12995 if (order
[i
] != order
[i
- 1])
12999 if (order
[i
] == order
[i
- 1])
13001 /* The register numbers must be ascending. */
13002 if (unsorted_regs
!= NULL
13003 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13009 /* Used to determine in a peephole whether a sequence of load
13010 instructions can be changed into a load-multiple instruction.
13011 NOPS is the number of separate load instructions we are examining. The
13012 first NOPS entries in OPERANDS are the destination registers, the
13013 next NOPS entries are memory operands. If this function is
13014 successful, *BASE is set to the common base register of the memory
13015 accesses; *LOAD_OFFSET is set to the first memory location's offset
13016 from that base register.
13017 REGS is an array filled in with the destination register numbers.
13018 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13019 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13020 the sequence of registers in REGS matches the loads from ascending memory
13021 locations, and the function verifies that the register numbers are
13022 themselves ascending. If CHECK_REGS is false, the register numbers
13023 are stored in the order they are found in the operands. */
13025 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13026 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13028 int unsorted_regs
[MAX_LDM_STM_OPS
];
13029 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13030 int order
[MAX_LDM_STM_OPS
];
13031 rtx base_reg_rtx
= NULL
;
13035 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13036 easily extended if required. */
13037 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13039 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13041 /* Loop over the operands and check that the memory references are
13042 suitable (i.e. immediate offsets from the same base register). At
13043 the same time, extract the target register, and the memory
13045 for (i
= 0; i
< nops
; i
++)
13050 /* Convert a subreg of a mem into the mem itself. */
13051 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13052 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13054 gcc_assert (MEM_P (operands
[nops
+ i
]));
13056 /* Don't reorder volatile memory references; it doesn't seem worth
13057 looking for the case where the order is ok anyway. */
13058 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13061 offset
= const0_rtx
;
13063 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13064 || (GET_CODE (reg
) == SUBREG
13065 && REG_P (reg
= SUBREG_REG (reg
))))
13066 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13067 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13068 || (GET_CODE (reg
) == SUBREG
13069 && REG_P (reg
= SUBREG_REG (reg
))))
13070 && (CONST_INT_P (offset
13071 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13075 base_reg
= REGNO (reg
);
13076 base_reg_rtx
= reg
;
13077 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13080 else if (base_reg
!= (int) REGNO (reg
))
13081 /* Not addressed from the same base register. */
13084 unsorted_regs
[i
] = (REG_P (operands
[i
])
13085 ? REGNO (operands
[i
])
13086 : REGNO (SUBREG_REG (operands
[i
])));
13088 /* If it isn't an integer register, or if it overwrites the
13089 base register but isn't the last insn in the list, then
13090 we can't do this. */
13091 if (unsorted_regs
[i
] < 0
13092 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13093 || unsorted_regs
[i
] > 14
13094 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13097 /* Don't allow SP to be loaded unless it is also the base
13098 register. It guarantees that SP is reset correctly when
13099 an LDM instruction is interrupted. Otherwise, we might
13100 end up with a corrupt stack. */
13101 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13104 unsorted_offsets
[i
] = INTVAL (offset
);
13105 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13109 /* Not a suitable memory address. */
13113 /* All the useful information has now been extracted from the
13114 operands into unsorted_regs and unsorted_offsets; additionally,
13115 order[0] has been set to the lowest offset in the list. Sort
13116 the offsets into order, verifying that they are adjacent, and
13117 check that the register numbers are ascending. */
13118 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13119 check_regs
? unsorted_regs
: NULL
))
13123 memcpy (saved_order
, order
, sizeof order
);
13129 for (i
= 0; i
< nops
; i
++)
13130 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13132 *load_offset
= unsorted_offsets
[order
[0]];
13136 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13139 if (unsorted_offsets
[order
[0]] == 0)
13140 ldm_case
= 1; /* ldmia */
13141 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13142 ldm_case
= 2; /* ldmib */
13143 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13144 ldm_case
= 3; /* ldmda */
13145 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13146 ldm_case
= 4; /* ldmdb */
13147 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13148 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13153 if (!multiple_operation_profitable_p (false, nops
,
13155 ? unsorted_offsets
[order
[0]] : 0))
13161 /* Used to determine in a peephole whether a sequence of store instructions can
13162 be changed into a store-multiple instruction.
13163 NOPS is the number of separate store instructions we are examining.
13164 NOPS_TOTAL is the total number of instructions recognized by the peephole
13166 The first NOPS entries in OPERANDS are the source registers, the next
13167 NOPS entries are memory operands. If this function is successful, *BASE is
13168 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13169 to the first memory location's offset from that base register. REGS is an
13170 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13171 likewise filled with the corresponding rtx's.
13172 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13173 numbers to an ascending order of stores.
13174 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13175 from ascending memory locations, and the function verifies that the register
13176 numbers are themselves ascending. If CHECK_REGS is false, the register
13177 numbers are stored in the order they are found in the operands. */
13179 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13180 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13181 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13183 int unsorted_regs
[MAX_LDM_STM_OPS
];
13184 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13185 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13186 int order
[MAX_LDM_STM_OPS
];
13188 rtx base_reg_rtx
= NULL
;
13191 /* Write back of base register is currently only supported for Thumb 1. */
13192 int base_writeback
= TARGET_THUMB1
;
13194 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13195 easily extended if required. */
13196 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13198 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13200 /* Loop over the operands and check that the memory references are
13201 suitable (i.e. immediate offsets from the same base register). At
13202 the same time, extract the target register, and the memory
13204 for (i
= 0; i
< nops
; i
++)
13209 /* Convert a subreg of a mem into the mem itself. */
13210 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13211 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13213 gcc_assert (MEM_P (operands
[nops
+ i
]));
13215 /* Don't reorder volatile memory references; it doesn't seem worth
13216 looking for the case where the order is ok anyway. */
13217 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13220 offset
= const0_rtx
;
13222 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13223 || (GET_CODE (reg
) == SUBREG
13224 && REG_P (reg
= SUBREG_REG (reg
))))
13225 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13226 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13227 || (GET_CODE (reg
) == SUBREG
13228 && REG_P (reg
= SUBREG_REG (reg
))))
13229 && (CONST_INT_P (offset
13230 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13232 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13233 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13234 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13238 base_reg
= REGNO (reg
);
13239 base_reg_rtx
= reg
;
13240 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13243 else if (base_reg
!= (int) REGNO (reg
))
13244 /* Not addressed from the same base register. */
13247 /* If it isn't an integer register, then we can't do this. */
13248 if (unsorted_regs
[i
] < 0
13249 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13250 /* The effects are unpredictable if the base register is
13251 both updated and stored. */
13252 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13253 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13254 || unsorted_regs
[i
] > 14)
13257 unsorted_offsets
[i
] = INTVAL (offset
);
13258 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13262 /* Not a suitable memory address. */
13266 /* All the useful information has now been extracted from the
13267 operands into unsorted_regs and unsorted_offsets; additionally,
13268 order[0] has been set to the lowest offset in the list. Sort
13269 the offsets into order, verifying that they are adjacent, and
13270 check that the register numbers are ascending. */
13271 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13272 check_regs
? unsorted_regs
: NULL
))
13276 memcpy (saved_order
, order
, sizeof order
);
13282 for (i
= 0; i
< nops
; i
++)
13284 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13286 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13289 *load_offset
= unsorted_offsets
[order
[0]];
13293 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13296 if (unsorted_offsets
[order
[0]] == 0)
13297 stm_case
= 1; /* stmia */
13298 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13299 stm_case
= 2; /* stmib */
13300 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13301 stm_case
= 3; /* stmda */
13302 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13303 stm_case
= 4; /* stmdb */
13307 if (!multiple_operation_profitable_p (false, nops
, 0))
13313 /* Routines for use in generating RTL. */
13315 /* Generate a load-multiple instruction. COUNT is the number of loads in
13316 the instruction; REGS and MEMS are arrays containing the operands.
13317 BASEREG is the base register to be used in addressing the memory operands.
13318 WBACK_OFFSET is nonzero if the instruction should update the base
13322 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13323 HOST_WIDE_INT wback_offset
)
13328 if (!multiple_operation_profitable_p (false, count
, 0))
13334 for (i
= 0; i
< count
; i
++)
13335 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13337 if (wback_offset
!= 0)
13338 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13340 seq
= get_insns ();
13346 result
= gen_rtx_PARALLEL (VOIDmode
,
13347 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13348 if (wback_offset
!= 0)
13350 XVECEXP (result
, 0, 0)
13351 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13356 for (j
= 0; i
< count
; i
++, j
++)
13357 XVECEXP (result
, 0, i
)
13358 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13363 /* Generate a store-multiple instruction. COUNT is the number of stores in
13364 the instruction; REGS and MEMS are arrays containing the operands.
13365 BASEREG is the base register to be used in addressing the memory operands.
13366 WBACK_OFFSET is nonzero if the instruction should update the base
13370 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13371 HOST_WIDE_INT wback_offset
)
13376 if (GET_CODE (basereg
) == PLUS
)
13377 basereg
= XEXP (basereg
, 0);
13379 if (!multiple_operation_profitable_p (false, count
, 0))
13385 for (i
= 0; i
< count
; i
++)
13386 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13388 if (wback_offset
!= 0)
13389 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13391 seq
= get_insns ();
13397 result
= gen_rtx_PARALLEL (VOIDmode
,
13398 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13399 if (wback_offset
!= 0)
13401 XVECEXP (result
, 0, 0)
13402 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13407 for (j
= 0; i
< count
; i
++, j
++)
13408 XVECEXP (result
, 0, i
)
13409 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13414 /* Generate either a load-multiple or a store-multiple instruction. This
13415 function can be used in situations where we can start with a single MEM
13416 rtx and adjust its address upwards.
13417 COUNT is the number of operations in the instruction, not counting a
13418 possible update of the base register. REGS is an array containing the
13420 BASEREG is the base register to be used in addressing the memory operands,
13421 which are constructed from BASEMEM.
13422 WRITE_BACK specifies whether the generated instruction should include an
13423 update of the base register.
13424 OFFSETP is used to pass an offset to and from this function; this offset
13425 is not used when constructing the address (instead BASEMEM should have an
13426 appropriate offset in its address), it is used only for setting
13427 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13430 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13431 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13433 rtx mems
[MAX_LDM_STM_OPS
];
13434 HOST_WIDE_INT offset
= *offsetp
;
13437 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13439 if (GET_CODE (basereg
) == PLUS
)
13440 basereg
= XEXP (basereg
, 0);
13442 for (i
= 0; i
< count
; i
++)
13444 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13445 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13453 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13454 write_back
? 4 * count
: 0);
13456 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13457 write_back
? 4 * count
: 0);
13461 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13462 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13464 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13469 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13470 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13472 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13476 /* Called from a peephole2 expander to turn a sequence of loads into an
13477 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13478 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13479 is true if we can reorder the registers because they are used commutatively
13481 Returns true iff we could generate a new instruction. */
13484 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13486 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13487 rtx mems
[MAX_LDM_STM_OPS
];
13488 int i
, j
, base_reg
;
13490 HOST_WIDE_INT offset
;
13491 int write_back
= FALSE
;
13495 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13496 &base_reg
, &offset
, !sort_regs
);
13502 for (i
= 0; i
< nops
- 1; i
++)
13503 for (j
= i
+ 1; j
< nops
; j
++)
13504 if (regs
[i
] > regs
[j
])
13510 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13514 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13515 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13521 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13522 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13524 if (!TARGET_THUMB1
)
13526 base_reg
= regs
[0];
13527 base_reg_rtx
= newbase
;
13531 for (i
= 0; i
< nops
; i
++)
13533 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13534 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13537 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13538 write_back
? offset
+ i
* 4 : 0));
13542 /* Called from a peephole2 expander to turn a sequence of stores into an
13543 STM instruction. OPERANDS are the operands found by the peephole matcher;
13544 NOPS indicates how many separate stores we are trying to combine.
13545 Returns true iff we could generate a new instruction. */
13548 gen_stm_seq (rtx
*operands
, int nops
)
13551 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13552 rtx mems
[MAX_LDM_STM_OPS
];
13555 HOST_WIDE_INT offset
;
13556 int write_back
= FALSE
;
13559 bool base_reg_dies
;
13561 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13562 mem_order
, &base_reg
, &offset
, true);
13567 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13569 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13572 gcc_assert (base_reg_dies
);
13578 gcc_assert (base_reg_dies
);
13579 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13583 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13585 for (i
= 0; i
< nops
; i
++)
13587 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13588 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13591 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13592 write_back
? offset
+ i
* 4 : 0));
13596 /* Called from a peephole2 expander to turn a sequence of stores that are
13597 preceded by constant loads into an STM instruction. OPERANDS are the
13598 operands found by the peephole matcher; NOPS indicates how many
13599 separate stores we are trying to combine; there are 2 * NOPS
13600 instructions in the peephole.
13601 Returns true iff we could generate a new instruction. */
13604 gen_const_stm_seq (rtx
*operands
, int nops
)
13606 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13607 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13608 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13609 rtx mems
[MAX_LDM_STM_OPS
];
13612 HOST_WIDE_INT offset
;
13613 int write_back
= FALSE
;
13616 bool base_reg_dies
;
13618 HARD_REG_SET allocated
;
13620 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13621 mem_order
, &base_reg
, &offset
, false);
13626 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13628 /* If the same register is used more than once, try to find a free
13630 CLEAR_HARD_REG_SET (allocated
);
13631 for (i
= 0; i
< nops
; i
++)
13633 for (j
= i
+ 1; j
< nops
; j
++)
13634 if (regs
[i
] == regs
[j
])
13636 rtx t
= peep2_find_free_register (0, nops
* 2,
13637 TARGET_THUMB1
? "l" : "r",
13638 SImode
, &allocated
);
13642 regs
[i
] = REGNO (t
);
13646 /* Compute an ordering that maps the register numbers to an ascending
13649 for (i
= 0; i
< nops
; i
++)
13650 if (regs
[i
] < regs
[reg_order
[0]])
13653 for (i
= 1; i
< nops
; i
++)
13655 int this_order
= reg_order
[i
- 1];
13656 for (j
= 0; j
< nops
; j
++)
13657 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13658 && (this_order
== reg_order
[i
- 1]
13659 || regs
[j
] < regs
[this_order
]))
13661 reg_order
[i
] = this_order
;
13664 /* Ensure that registers that must be live after the instruction end
13665 up with the correct value. */
13666 for (i
= 0; i
< nops
; i
++)
13668 int this_order
= reg_order
[i
];
13669 if ((this_order
!= mem_order
[i
]
13670 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13671 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13675 /* Load the constants. */
13676 for (i
= 0; i
< nops
; i
++)
13678 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13679 sorted_regs
[i
] = regs
[reg_order
[i
]];
13680 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13683 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13685 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13688 gcc_assert (base_reg_dies
);
13694 gcc_assert (base_reg_dies
);
13695 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13699 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13701 for (i
= 0; i
< nops
; i
++)
13703 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13704 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13707 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13708 write_back
? offset
+ i
* 4 : 0));
13712 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13713 unaligned copies on processors which support unaligned semantics for those
13714 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13715 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13716 An interleave factor of 1 (the minimum) will perform no interleaving.
13717 Load/store multiple are used for aligned addresses where possible. */
13720 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13721 HOST_WIDE_INT length
,
13722 unsigned int interleave_factor
)
13724 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13725 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13726 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13727 HOST_WIDE_INT i
, j
;
13728 HOST_WIDE_INT remaining
= length
, words
;
13729 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13731 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13732 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13733 HOST_WIDE_INT srcoffset
, dstoffset
;
13734 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13737 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13739 /* Use hard registers if we have aligned source or destination so we can use
13740 load/store multiple with contiguous registers. */
13741 if (dst_aligned
|| src_aligned
)
13742 for (i
= 0; i
< interleave_factor
; i
++)
13743 regs
[i
] = gen_rtx_REG (SImode
, i
);
13745 for (i
= 0; i
< interleave_factor
; i
++)
13746 regs
[i
] = gen_reg_rtx (SImode
);
13748 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13749 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13751 srcoffset
= dstoffset
= 0;
13753 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13754 For copying the last bytes we want to subtract this offset again. */
13755 src_autoinc
= dst_autoinc
= 0;
13757 for (i
= 0; i
< interleave_factor
; i
++)
13760 /* Copy BLOCK_SIZE_BYTES chunks. */
13762 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13765 if (src_aligned
&& interleave_factor
> 1)
13767 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13768 TRUE
, srcbase
, &srcoffset
));
13769 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13773 for (j
= 0; j
< interleave_factor
; j
++)
13775 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13777 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13778 srcoffset
+ j
* UNITS_PER_WORD
);
13779 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13781 srcoffset
+= block_size_bytes
;
13785 if (dst_aligned
&& interleave_factor
> 1)
13787 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13788 TRUE
, dstbase
, &dstoffset
));
13789 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13793 for (j
= 0; j
< interleave_factor
; j
++)
13795 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13797 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13798 dstoffset
+ j
* UNITS_PER_WORD
);
13799 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13801 dstoffset
+= block_size_bytes
;
13804 remaining
-= block_size_bytes
;
13807 /* Copy any whole words left (note these aren't interleaved with any
13808 subsequent halfword/byte load/stores in the interests of simplicity). */
13810 words
= remaining
/ UNITS_PER_WORD
;
13812 gcc_assert (words
< interleave_factor
);
13814 if (src_aligned
&& words
> 1)
13816 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13818 src_autoinc
+= UNITS_PER_WORD
* words
;
13822 for (j
= 0; j
< words
; j
++)
13824 addr
= plus_constant (Pmode
, src
,
13825 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13826 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13827 srcoffset
+ j
* UNITS_PER_WORD
);
13829 emit_move_insn (regs
[j
], mem
);
13831 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13833 srcoffset
+= words
* UNITS_PER_WORD
;
13836 if (dst_aligned
&& words
> 1)
13838 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13840 dst_autoinc
+= words
* UNITS_PER_WORD
;
13844 for (j
= 0; j
< words
; j
++)
13846 addr
= plus_constant (Pmode
, dst
,
13847 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13848 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13849 dstoffset
+ j
* UNITS_PER_WORD
);
13851 emit_move_insn (mem
, regs
[j
]);
13853 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13855 dstoffset
+= words
* UNITS_PER_WORD
;
13858 remaining
-= words
* UNITS_PER_WORD
;
13860 gcc_assert (remaining
< 4);
13862 /* Copy a halfword if necessary. */
13864 if (remaining
>= 2)
13866 halfword_tmp
= gen_reg_rtx (SImode
);
13868 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13869 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13870 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13872 /* Either write out immediately, or delay until we've loaded the last
13873 byte, depending on interleave factor. */
13874 if (interleave_factor
== 1)
13876 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13877 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13878 emit_insn (gen_unaligned_storehi (mem
,
13879 gen_lowpart (HImode
, halfword_tmp
)));
13880 halfword_tmp
= NULL
;
13888 gcc_assert (remaining
< 2);
13890 /* Copy last byte. */
13892 if ((remaining
& 1) != 0)
13894 byte_tmp
= gen_reg_rtx (SImode
);
13896 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13897 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13898 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
13900 if (interleave_factor
== 1)
13902 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13903 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13904 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13913 /* Store last halfword if we haven't done so already. */
13917 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13918 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13919 emit_insn (gen_unaligned_storehi (mem
,
13920 gen_lowpart (HImode
, halfword_tmp
)));
13924 /* Likewise for last byte. */
13928 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13929 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13930 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13934 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
13937 /* From mips_adjust_block_mem:
13939 Helper function for doing a loop-based block operation on memory
13940 reference MEM. Each iteration of the loop will operate on LENGTH
13943 Create a new base register for use within the loop and point it to
13944 the start of MEM. Create a new memory reference that uses this
13945 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13948 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
13951 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
13953 /* Although the new mem does not refer to a known location,
13954 it does keep up to LENGTH bytes of alignment. */
13955 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
13956 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
13959 /* From mips_block_move_loop:
13961 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13962 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13963 the memory regions do not overlap. */
13966 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
13967 unsigned int interleave_factor
,
13968 HOST_WIDE_INT bytes_per_iter
)
13970 rtx src_reg
, dest_reg
, final_src
, test
;
13971 HOST_WIDE_INT leftover
;
13973 leftover
= length
% bytes_per_iter
;
13974 length
-= leftover
;
13976 /* Create registers and memory references for use within the loop. */
13977 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
13978 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
13980 /* Calculate the value that SRC_REG should have after the last iteration of
13982 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
13983 0, 0, OPTAB_WIDEN
);
13985 /* Emit the start of the loop. */
13986 rtx_code_label
*label
= gen_label_rtx ();
13987 emit_label (label
);
13989 /* Emit the loop body. */
13990 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
13991 interleave_factor
);
13993 /* Move on to the next block. */
13994 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
13995 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
13997 /* Emit the loop condition. */
13998 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
13999 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14001 /* Mop up any left-over bytes. */
14003 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14006 /* Emit a block move when either the source or destination is unaligned (not
14007 aligned to a four-byte boundary). This may need further tuning depending on
14008 core type, optimize_size setting, etc. */
14011 arm_movmemqi_unaligned (rtx
*operands
)
14013 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14017 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14018 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14019 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14020 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14021 or dst_aligned though: allow more interleaving in those cases since the
14022 resulting code can be smaller. */
14023 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14024 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14027 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14028 interleave_factor
, bytes_per_iter
);
14030 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14031 interleave_factor
);
14035 /* Note that the loop created by arm_block_move_unaligned_loop may be
14036 subject to loop unrolling, which makes tuning this condition a little
14039 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14041 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14048 arm_gen_movmemqi (rtx
*operands
)
14050 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14051 HOST_WIDE_INT srcoffset
, dstoffset
;
14053 rtx src
, dst
, srcbase
, dstbase
;
14054 rtx part_bytes_reg
= NULL
;
14057 if (!CONST_INT_P (operands
[2])
14058 || !CONST_INT_P (operands
[3])
14059 || INTVAL (operands
[2]) > 64)
14062 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14063 return arm_movmemqi_unaligned (operands
);
14065 if (INTVAL (operands
[3]) & 3)
14068 dstbase
= operands
[0];
14069 srcbase
= operands
[1];
14071 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14072 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14074 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14075 out_words_to_go
= INTVAL (operands
[2]) / 4;
14076 last_bytes
= INTVAL (operands
[2]) & 3;
14077 dstoffset
= srcoffset
= 0;
14079 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14080 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14082 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14084 if (in_words_to_go
> 4)
14085 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14086 TRUE
, srcbase
, &srcoffset
));
14088 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14089 src
, FALSE
, srcbase
,
14092 if (out_words_to_go
)
14094 if (out_words_to_go
> 4)
14095 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14096 TRUE
, dstbase
, &dstoffset
));
14097 else if (out_words_to_go
!= 1)
14098 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14099 out_words_to_go
, dst
,
14102 dstbase
, &dstoffset
));
14105 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14106 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14107 if (last_bytes
!= 0)
14109 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14115 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14116 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14119 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14120 if (out_words_to_go
)
14124 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14125 sreg
= copy_to_reg (mem
);
14127 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14128 emit_move_insn (mem
, sreg
);
14131 gcc_assert (!in_words_to_go
); /* Sanity check */
14134 if (in_words_to_go
)
14136 gcc_assert (in_words_to_go
> 0);
14138 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14139 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14142 gcc_assert (!last_bytes
|| part_bytes_reg
);
14144 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14146 rtx tmp
= gen_reg_rtx (SImode
);
14148 /* The bytes we want are in the top end of the word. */
14149 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14150 GEN_INT (8 * (4 - last_bytes
))));
14151 part_bytes_reg
= tmp
;
14155 mem
= adjust_automodify_address (dstbase
, QImode
,
14156 plus_constant (Pmode
, dst
,
14158 dstoffset
+ last_bytes
- 1);
14159 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14163 tmp
= gen_reg_rtx (SImode
);
14164 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14165 part_bytes_reg
= tmp
;
14172 if (last_bytes
> 1)
14174 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14175 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14179 rtx tmp
= gen_reg_rtx (SImode
);
14180 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14181 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14182 part_bytes_reg
= tmp
;
14189 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14190 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14197 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14200 next_consecutive_mem (rtx mem
)
14202 machine_mode mode
= GET_MODE (mem
);
14203 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14204 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14206 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14209 /* Copy using LDRD/STRD instructions whenever possible.
14210 Returns true upon success. */
14212 gen_movmem_ldrd_strd (rtx
*operands
)
14214 unsigned HOST_WIDE_INT len
;
14215 HOST_WIDE_INT align
;
14216 rtx src
, dst
, base
;
14218 bool src_aligned
, dst_aligned
;
14219 bool src_volatile
, dst_volatile
;
14221 gcc_assert (CONST_INT_P (operands
[2]));
14222 gcc_assert (CONST_INT_P (operands
[3]));
14224 len
= UINTVAL (operands
[2]);
14228 /* Maximum alignment we can assume for both src and dst buffers. */
14229 align
= INTVAL (operands
[3]);
14231 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14234 /* Place src and dst addresses in registers
14235 and update the corresponding mem rtx. */
14237 dst_volatile
= MEM_VOLATILE_P (dst
);
14238 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14239 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14240 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14243 src_volatile
= MEM_VOLATILE_P (src
);
14244 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14245 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14246 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14248 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14251 if (src_volatile
|| dst_volatile
)
14254 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14255 if (!(dst_aligned
|| src_aligned
))
14256 return arm_gen_movmemqi (operands
);
14258 /* If the either src or dst is unaligned we'll be accessing it as pairs
14259 of unaligned SImode accesses. Otherwise we can generate DImode
14260 ldrd/strd instructions. */
14261 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14262 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14267 reg0
= gen_reg_rtx (DImode
);
14268 rtx low_reg
= NULL_RTX
;
14269 rtx hi_reg
= NULL_RTX
;
14271 if (!src_aligned
|| !dst_aligned
)
14273 low_reg
= gen_lowpart (SImode
, reg0
);
14274 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14277 emit_move_insn (reg0
, src
);
14280 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14281 src
= next_consecutive_mem (src
);
14282 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14286 emit_move_insn (dst
, reg0
);
14289 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14290 dst
= next_consecutive_mem (dst
);
14291 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14294 src
= next_consecutive_mem (src
);
14295 dst
= next_consecutive_mem (dst
);
14298 gcc_assert (len
< 8);
14301 /* More than a word but less than a double-word to copy. Copy a word. */
14302 reg0
= gen_reg_rtx (SImode
);
14303 src
= adjust_address (src
, SImode
, 0);
14304 dst
= adjust_address (dst
, SImode
, 0);
14306 emit_move_insn (reg0
, src
);
14308 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14311 emit_move_insn (dst
, reg0
);
14313 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14315 src
= next_consecutive_mem (src
);
14316 dst
= next_consecutive_mem (dst
);
14323 /* Copy the remaining bytes. */
14326 dst
= adjust_address (dst
, HImode
, 0);
14327 src
= adjust_address (src
, HImode
, 0);
14328 reg0
= gen_reg_rtx (SImode
);
14330 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14332 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14335 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14337 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14339 src
= next_consecutive_mem (src
);
14340 dst
= next_consecutive_mem (dst
);
14345 dst
= adjust_address (dst
, QImode
, 0);
14346 src
= adjust_address (src
, QImode
, 0);
14347 reg0
= gen_reg_rtx (QImode
);
14348 emit_move_insn (reg0
, src
);
14349 emit_move_insn (dst
, reg0
);
14353 /* Select a dominance comparison mode if possible for a test of the general
14354 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14355 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14356 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14357 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14358 In all cases OP will be either EQ or NE, but we don't need to know which
14359 here. If we are unable to support a dominance comparison we return
14360 CC mode. This will then fail to match for the RTL expressions that
14361 generate this call. */
14363 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14365 enum rtx_code cond1
, cond2
;
14368 /* Currently we will probably get the wrong result if the individual
14369 comparisons are not simple. This also ensures that it is safe to
14370 reverse a comparison if necessary. */
14371 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14373 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14377 /* The if_then_else variant of this tests the second condition if the
14378 first passes, but is true if the first fails. Reverse the first
14379 condition to get a true "inclusive-or" expression. */
14380 if (cond_or
== DOM_CC_NX_OR_Y
)
14381 cond1
= reverse_condition (cond1
);
14383 /* If the comparisons are not equal, and one doesn't dominate the other,
14384 then we can't do this. */
14386 && !comparison_dominates_p (cond1
, cond2
)
14387 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14391 std::swap (cond1
, cond2
);
14396 if (cond_or
== DOM_CC_X_AND_Y
)
14401 case EQ
: return CC_DEQmode
;
14402 case LE
: return CC_DLEmode
;
14403 case LEU
: return CC_DLEUmode
;
14404 case GE
: return CC_DGEmode
;
14405 case GEU
: return CC_DGEUmode
;
14406 default: gcc_unreachable ();
14410 if (cond_or
== DOM_CC_X_AND_Y
)
14422 gcc_unreachable ();
14426 if (cond_or
== DOM_CC_X_AND_Y
)
14438 gcc_unreachable ();
14442 if (cond_or
== DOM_CC_X_AND_Y
)
14443 return CC_DLTUmode
;
14448 return CC_DLTUmode
;
14450 return CC_DLEUmode
;
14454 gcc_unreachable ();
14458 if (cond_or
== DOM_CC_X_AND_Y
)
14459 return CC_DGTUmode
;
14464 return CC_DGTUmode
;
14466 return CC_DGEUmode
;
14470 gcc_unreachable ();
14473 /* The remaining cases only occur when both comparisons are the
14476 gcc_assert (cond1
== cond2
);
14480 gcc_assert (cond1
== cond2
);
14484 gcc_assert (cond1
== cond2
);
14488 gcc_assert (cond1
== cond2
);
14489 return CC_DLEUmode
;
14492 gcc_assert (cond1
== cond2
);
14493 return CC_DGEUmode
;
14496 gcc_unreachable ();
14501 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14503 /* All floating point compares return CCFP if it is an equality
14504 comparison, and CCFPE otherwise. */
14505 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14528 gcc_unreachable ();
14532 /* A compare with a shifted operand. Because of canonicalization, the
14533 comparison will have to be swapped when we emit the assembler. */
14534 if (GET_MODE (y
) == SImode
14535 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14536 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14537 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14538 || GET_CODE (x
) == ROTATERT
))
14541 /* This operation is performed swapped, but since we only rely on the Z
14542 flag we don't need an additional mode. */
14543 if (GET_MODE (y
) == SImode
14544 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14545 && GET_CODE (x
) == NEG
14546 && (op
== EQ
|| op
== NE
))
14549 /* This is a special case that is used by combine to allow a
14550 comparison of a shifted byte load to be split into a zero-extend
14551 followed by a comparison of the shifted integer (only valid for
14552 equalities and unsigned inequalities). */
14553 if (GET_MODE (x
) == SImode
14554 && GET_CODE (x
) == ASHIFT
14555 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14556 && GET_CODE (XEXP (x
, 0)) == SUBREG
14557 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14558 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14559 && (op
== EQ
|| op
== NE
14560 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14561 && CONST_INT_P (y
))
14564 /* A construct for a conditional compare, if the false arm contains
14565 0, then both conditions must be true, otherwise either condition
14566 must be true. Not all conditions are possible, so CCmode is
14567 returned if it can't be done. */
14568 if (GET_CODE (x
) == IF_THEN_ELSE
14569 && (XEXP (x
, 2) == const0_rtx
14570 || XEXP (x
, 2) == const1_rtx
)
14571 && COMPARISON_P (XEXP (x
, 0))
14572 && COMPARISON_P (XEXP (x
, 1)))
14573 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14574 INTVAL (XEXP (x
, 2)));
14576 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14577 if (GET_CODE (x
) == AND
14578 && (op
== EQ
|| op
== NE
)
14579 && COMPARISON_P (XEXP (x
, 0))
14580 && COMPARISON_P (XEXP (x
, 1)))
14581 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14584 if (GET_CODE (x
) == IOR
14585 && (op
== EQ
|| op
== NE
)
14586 && COMPARISON_P (XEXP (x
, 0))
14587 && COMPARISON_P (XEXP (x
, 1)))
14588 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14591 /* An operation (on Thumb) where we want to test for a single bit.
14592 This is done by shifting that bit up into the top bit of a
14593 scratch register; we can then branch on the sign bit. */
14595 && GET_MODE (x
) == SImode
14596 && (op
== EQ
|| op
== NE
)
14597 && GET_CODE (x
) == ZERO_EXTRACT
14598 && XEXP (x
, 1) == const1_rtx
)
14601 /* An operation that sets the condition codes as a side-effect, the
14602 V flag is not set correctly, so we can only use comparisons where
14603 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14605 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14606 if (GET_MODE (x
) == SImode
14608 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14609 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14610 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14611 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14612 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14613 || GET_CODE (x
) == LSHIFTRT
14614 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14615 || GET_CODE (x
) == ROTATERT
14616 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14617 return CC_NOOVmode
;
14619 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14622 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14623 && GET_CODE (x
) == PLUS
14624 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14627 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14633 /* A DImode comparison against zero can be implemented by
14634 or'ing the two halves together. */
14635 if (y
== const0_rtx
)
14638 /* We can do an equality test in three Thumb instructions. */
14648 /* DImode unsigned comparisons can be implemented by cmp +
14649 cmpeq without a scratch register. Not worth doing in
14660 /* DImode signed and unsigned comparisons can be implemented
14661 by cmp + sbcs with a scratch register, but that does not
14662 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14663 gcc_assert (op
!= EQ
&& op
!= NE
);
14667 gcc_unreachable ();
14671 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14672 return GET_MODE (x
);
14677 /* X and Y are two things to compare using CODE. Emit the compare insn and
14678 return the rtx for register 0 in the proper mode. FP means this is a
14679 floating point compare: I don't think that it is needed on the arm. */
14681 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14685 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14687 /* We might have X as a constant, Y as a register because of the predicates
14688 used for cmpdi. If so, force X to a register here. */
14689 if (dimode_comparison
&& !REG_P (x
))
14690 x
= force_reg (DImode
, x
);
14692 mode
= SELECT_CC_MODE (code
, x
, y
);
14693 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14695 if (dimode_comparison
14696 && mode
!= CC_CZmode
)
14700 /* To compare two non-zero values for equality, XOR them and
14701 then compare against zero. Not used for ARM mode; there
14702 CC_CZmode is cheaper. */
14703 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14705 gcc_assert (!reload_completed
);
14706 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14710 /* A scratch register is required. */
14711 if (reload_completed
)
14712 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14714 scratch
= gen_rtx_SCRATCH (SImode
);
14716 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14717 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14718 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14721 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14726 /* Generate a sequence of insns that will generate the correct return
14727 address mask depending on the physical architecture that the program
14730 arm_gen_return_addr_mask (void)
14732 rtx reg
= gen_reg_rtx (Pmode
);
14734 emit_insn (gen_return_addr_mask (reg
));
14739 arm_reload_in_hi (rtx
*operands
)
14741 rtx ref
= operands
[1];
14743 HOST_WIDE_INT offset
= 0;
14745 if (GET_CODE (ref
) == SUBREG
)
14747 offset
= SUBREG_BYTE (ref
);
14748 ref
= SUBREG_REG (ref
);
14753 /* We have a pseudo which has been spilt onto the stack; there
14754 are two cases here: the first where there is a simple
14755 stack-slot replacement and a second where the stack-slot is
14756 out of range, or is used as a subreg. */
14757 if (reg_equiv_mem (REGNO (ref
)))
14759 ref
= reg_equiv_mem (REGNO (ref
));
14760 base
= find_replacement (&XEXP (ref
, 0));
14763 /* The slot is out of range, or was dressed up in a SUBREG. */
14764 base
= reg_equiv_address (REGNO (ref
));
14766 /* PR 62554: If there is no equivalent memory location then just move
14767 the value as an SImode register move. This happens when the target
14768 architecture variant does not have an HImode register move. */
14771 gcc_assert (REG_P (operands
[0]));
14772 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14773 gen_rtx_SUBREG (SImode
, ref
, 0)));
14778 base
= find_replacement (&XEXP (ref
, 0));
14780 /* Handle the case where the address is too complex to be offset by 1. */
14781 if (GET_CODE (base
) == MINUS
14782 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14784 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14786 emit_set_insn (base_plus
, base
);
14789 else if (GET_CODE (base
) == PLUS
)
14791 /* The addend must be CONST_INT, or we would have dealt with it above. */
14792 HOST_WIDE_INT hi
, lo
;
14794 offset
+= INTVAL (XEXP (base
, 1));
14795 base
= XEXP (base
, 0);
14797 /* Rework the address into a legal sequence of insns. */
14798 /* Valid range for lo is -4095 -> 4095 */
14801 : -((-offset
) & 0xfff));
14803 /* Corner case, if lo is the max offset then we would be out of range
14804 once we have added the additional 1 below, so bump the msb into the
14805 pre-loading insn(s). */
14809 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14810 ^ (HOST_WIDE_INT
) 0x80000000)
14811 - (HOST_WIDE_INT
) 0x80000000);
14813 gcc_assert (hi
+ lo
== offset
);
14817 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14819 /* Get the base address; addsi3 knows how to handle constants
14820 that require more than one insn. */
14821 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14827 /* Operands[2] may overlap operands[0] (though it won't overlap
14828 operands[1]), that's why we asked for a DImode reg -- so we can
14829 use the bit that does not overlap. */
14830 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14831 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14833 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14835 emit_insn (gen_zero_extendqisi2 (scratch
,
14836 gen_rtx_MEM (QImode
,
14837 plus_constant (Pmode
, base
,
14839 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14840 gen_rtx_MEM (QImode
,
14841 plus_constant (Pmode
, base
,
14843 if (!BYTES_BIG_ENDIAN
)
14844 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14845 gen_rtx_IOR (SImode
,
14848 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14852 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14853 gen_rtx_IOR (SImode
,
14854 gen_rtx_ASHIFT (SImode
, scratch
,
14856 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14859 /* Handle storing a half-word to memory during reload by synthesizing as two
14860 byte stores. Take care not to clobber the input values until after we
14861 have moved them somewhere safe. This code assumes that if the DImode
14862 scratch in operands[2] overlaps either the input value or output address
14863 in some way, then that value must die in this insn (we absolutely need
14864 two scratch registers for some corner cases). */
14866 arm_reload_out_hi (rtx
*operands
)
14868 rtx ref
= operands
[0];
14869 rtx outval
= operands
[1];
14871 HOST_WIDE_INT offset
= 0;
14873 if (GET_CODE (ref
) == SUBREG
)
14875 offset
= SUBREG_BYTE (ref
);
14876 ref
= SUBREG_REG (ref
);
14881 /* We have a pseudo which has been spilt onto the stack; there
14882 are two cases here: the first where there is a simple
14883 stack-slot replacement and a second where the stack-slot is
14884 out of range, or is used as a subreg. */
14885 if (reg_equiv_mem (REGNO (ref
)))
14887 ref
= reg_equiv_mem (REGNO (ref
));
14888 base
= find_replacement (&XEXP (ref
, 0));
14891 /* The slot is out of range, or was dressed up in a SUBREG. */
14892 base
= reg_equiv_address (REGNO (ref
));
14894 /* PR 62254: If there is no equivalent memory location then just move
14895 the value as an SImode register move. This happens when the target
14896 architecture variant does not have an HImode register move. */
14899 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
14901 if (REG_P (outval
))
14903 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14904 gen_rtx_SUBREG (SImode
, outval
, 0)));
14906 else /* SUBREG_P (outval) */
14908 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
14909 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14910 SUBREG_REG (outval
)));
14912 /* FIXME: Handle other cases ? */
14913 gcc_unreachable ();
14919 base
= find_replacement (&XEXP (ref
, 0));
14921 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14923 /* Handle the case where the address is too complex to be offset by 1. */
14924 if (GET_CODE (base
) == MINUS
14925 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14927 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14929 /* Be careful not to destroy OUTVAL. */
14930 if (reg_overlap_mentioned_p (base_plus
, outval
))
14932 /* Updating base_plus might destroy outval, see if we can
14933 swap the scratch and base_plus. */
14934 if (!reg_overlap_mentioned_p (scratch
, outval
))
14935 std::swap (scratch
, base_plus
);
14938 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14940 /* Be conservative and copy OUTVAL into the scratch now,
14941 this should only be necessary if outval is a subreg
14942 of something larger than a word. */
14943 /* XXX Might this clobber base? I can't see how it can,
14944 since scratch is known to overlap with OUTVAL, and
14945 must be wider than a word. */
14946 emit_insn (gen_movhi (scratch_hi
, outval
));
14947 outval
= scratch_hi
;
14951 emit_set_insn (base_plus
, base
);
14954 else if (GET_CODE (base
) == PLUS
)
14956 /* The addend must be CONST_INT, or we would have dealt with it above. */
14957 HOST_WIDE_INT hi
, lo
;
14959 offset
+= INTVAL (XEXP (base
, 1));
14960 base
= XEXP (base
, 0);
14962 /* Rework the address into a legal sequence of insns. */
14963 /* Valid range for lo is -4095 -> 4095 */
14966 : -((-offset
) & 0xfff));
14968 /* Corner case, if lo is the max offset then we would be out of range
14969 once we have added the additional 1 below, so bump the msb into the
14970 pre-loading insn(s). */
14974 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14975 ^ (HOST_WIDE_INT
) 0x80000000)
14976 - (HOST_WIDE_INT
) 0x80000000);
14978 gcc_assert (hi
+ lo
== offset
);
14982 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14984 /* Be careful not to destroy OUTVAL. */
14985 if (reg_overlap_mentioned_p (base_plus
, outval
))
14987 /* Updating base_plus might destroy outval, see if we
14988 can swap the scratch and base_plus. */
14989 if (!reg_overlap_mentioned_p (scratch
, outval
))
14990 std::swap (scratch
, base_plus
);
14993 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14995 /* Be conservative and copy outval into scratch now,
14996 this should only be necessary if outval is a
14997 subreg of something larger than a word. */
14998 /* XXX Might this clobber base? I can't see how it
14999 can, since scratch is known to overlap with
15001 emit_insn (gen_movhi (scratch_hi
, outval
));
15002 outval
= scratch_hi
;
15006 /* Get the base address; addsi3 knows how to handle constants
15007 that require more than one insn. */
15008 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15014 if (BYTES_BIG_ENDIAN
)
15016 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15017 plus_constant (Pmode
, base
,
15019 gen_lowpart (QImode
, outval
)));
15020 emit_insn (gen_lshrsi3 (scratch
,
15021 gen_rtx_SUBREG (SImode
, outval
, 0),
15023 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15025 gen_lowpart (QImode
, scratch
)));
15029 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15031 gen_lowpart (QImode
, outval
)));
15032 emit_insn (gen_lshrsi3 (scratch
,
15033 gen_rtx_SUBREG (SImode
, outval
, 0),
15035 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15036 plus_constant (Pmode
, base
,
15038 gen_lowpart (QImode
, scratch
)));
15042 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15043 (padded to the size of a word) should be passed in a register. */
15046 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15048 if (TARGET_AAPCS_BASED
)
15049 return must_pass_in_stack_var_size (mode
, type
);
15051 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15055 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15056 Return true if an argument passed on the stack should be padded upwards,
15057 i.e. if the least-significant byte has useful data.
15058 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15059 aggregate types are placed in the lowest memory address. */
15062 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15064 if (!TARGET_AAPCS_BASED
)
15065 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15067 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15074 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15075 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15076 register has useful data, and return the opposite if the most
15077 significant byte does. */
15080 arm_pad_reg_upward (machine_mode mode
,
15081 tree type
, int first ATTRIBUTE_UNUSED
)
15083 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15085 /* For AAPCS, small aggregates, small fixed-point types,
15086 and small complex types are always padded upwards. */
15089 if ((AGGREGATE_TYPE_P (type
)
15090 || TREE_CODE (type
) == COMPLEX_TYPE
15091 || FIXED_POINT_TYPE_P (type
))
15092 && int_size_in_bytes (type
) <= 4)
15097 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15098 && GET_MODE_SIZE (mode
) <= 4)
15103 /* Otherwise, use default padding. */
15104 return !BYTES_BIG_ENDIAN
;
15107 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15108 assuming that the address in the base register is word aligned. */
15110 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15112 HOST_WIDE_INT max_offset
;
15114 /* Offset must be a multiple of 4 in Thumb mode. */
15115 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15120 else if (TARGET_ARM
)
15125 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15128 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15129 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15130 Assumes that the address in the base register RN is word aligned. Pattern
15131 guarantees that both memory accesses use the same base register,
15132 the offsets are constants within the range, and the gap between the offsets is 4.
15133 If preload complete then check that registers are legal. WBACK indicates whether
15134 address is updated. LOAD indicates whether memory access is load or store. */
15136 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15137 bool wback
, bool load
)
15139 unsigned int t
, t2
, n
;
15141 if (!reload_completed
)
15144 if (!offset_ok_for_ldrd_strd (offset
))
15151 if ((TARGET_THUMB2
)
15152 && ((wback
&& (n
== t
|| n
== t2
))
15153 || (t
== SP_REGNUM
)
15154 || (t
== PC_REGNUM
)
15155 || (t2
== SP_REGNUM
)
15156 || (t2
== PC_REGNUM
)
15157 || (!load
&& (n
== PC_REGNUM
))
15158 || (load
&& (t
== t2
))
15159 /* Triggers Cortex-M3 LDRD errata. */
15160 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15164 && ((wback
&& (n
== t
|| n
== t2
))
15165 || (t2
== PC_REGNUM
)
15166 || (t
% 2 != 0) /* First destination register is not even. */
15168 /* PC can be used as base register (for offset addressing only),
15169 but it is depricated. */
15170 || (n
== PC_REGNUM
)))
15176 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15177 operand MEM's address contains an immediate offset from the base
15178 register and has no side effects, in which case it sets BASE and
15179 OFFSET accordingly. */
15181 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15185 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15187 /* TODO: Handle more general memory operand patterns, such as
15188 PRE_DEC and PRE_INC. */
15190 if (side_effects_p (mem
))
15193 /* Can't deal with subregs. */
15194 if (GET_CODE (mem
) == SUBREG
)
15197 gcc_assert (MEM_P (mem
));
15199 *offset
= const0_rtx
;
15201 addr
= XEXP (mem
, 0);
15203 /* If addr isn't valid for DImode, then we can't handle it. */
15204 if (!arm_legitimate_address_p (DImode
, addr
,
15205 reload_in_progress
|| reload_completed
))
15213 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15215 *base
= XEXP (addr
, 0);
15216 *offset
= XEXP (addr
, 1);
15217 return (REG_P (*base
) && CONST_INT_P (*offset
));
15223 /* Called from a peephole2 to replace two word-size accesses with a
15224 single LDRD/STRD instruction. Returns true iff we can generate a
15225 new instruction sequence. That is, both accesses use the same base
15226 register and the gap between constant offsets is 4. This function
15227 may reorder its operands to match ldrd/strd RTL templates.
15228 OPERANDS are the operands found by the peephole matcher;
15229 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15230 corresponding memory operands. LOAD indicaates whether the access
15231 is load or store. CONST_STORE indicates a store of constant
15232 integer values held in OPERANDS[4,5] and assumes that the pattern
15233 is of length 4 insn, for the purpose of checking dead registers.
15234 COMMUTE indicates that register operands may be reordered. */
15236 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15237 bool const_store
, bool commute
)
15240 HOST_WIDE_INT offsets
[2], offset
;
15241 rtx base
= NULL_RTX
;
15242 rtx cur_base
, cur_offset
, tmp
;
15244 HARD_REG_SET regset
;
15246 gcc_assert (!const_store
|| !load
);
15247 /* Check that the memory references are immediate offsets from the
15248 same base register. Extract the base register, the destination
15249 registers, and the corresponding memory offsets. */
15250 for (i
= 0; i
< nops
; i
++)
15252 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15257 else if (REGNO (base
) != REGNO (cur_base
))
15260 offsets
[i
] = INTVAL (cur_offset
);
15261 if (GET_CODE (operands
[i
]) == SUBREG
)
15263 tmp
= SUBREG_REG (operands
[i
]);
15264 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15269 /* Make sure there is no dependency between the individual loads. */
15270 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15271 return false; /* RAW */
15273 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15274 return false; /* WAW */
15276 /* If the same input register is used in both stores
15277 when storing different constants, try to find a free register.
15278 For example, the code
15283 can be transformed into
15287 in Thumb mode assuming that r1 is free.
15288 For ARM mode do the same but only if the starting register
15289 can be made to be even. */
15291 && REGNO (operands
[0]) == REGNO (operands
[1])
15292 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15296 CLEAR_HARD_REG_SET (regset
);
15297 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15298 if (tmp
== NULL_RTX
)
15301 /* Use the new register in the first load to ensure that
15302 if the original input register is not dead after peephole,
15303 then it will have the correct constant value. */
15306 else if (TARGET_ARM
)
15308 int regno
= REGNO (operands
[0]);
15309 if (!peep2_reg_dead_p (4, operands
[0]))
15311 /* When the input register is even and is not dead after the
15312 pattern, it has to hold the second constant but we cannot
15313 form a legal STRD in ARM mode with this register as the second
15315 if (regno
% 2 == 0)
15318 /* Is regno-1 free? */
15319 SET_HARD_REG_SET (regset
);
15320 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15321 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15322 if (tmp
== NULL_RTX
)
15329 /* Find a DImode register. */
15330 CLEAR_HARD_REG_SET (regset
);
15331 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15332 if (tmp
!= NULL_RTX
)
15334 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15335 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15339 /* Can we use the input register to form a DI register? */
15340 SET_HARD_REG_SET (regset
);
15341 CLEAR_HARD_REG_BIT(regset
,
15342 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15343 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15344 if (tmp
== NULL_RTX
)
15346 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15350 gcc_assert (operands
[0] != NULL_RTX
);
15351 gcc_assert (operands
[1] != NULL_RTX
);
15352 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15353 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15357 /* Make sure the instructions are ordered with lower memory access first. */
15358 if (offsets
[0] > offsets
[1])
15360 gap
= offsets
[0] - offsets
[1];
15361 offset
= offsets
[1];
15363 /* Swap the instructions such that lower memory is accessed first. */
15364 std::swap (operands
[0], operands
[1]);
15365 std::swap (operands
[2], operands
[3]);
15367 std::swap (operands
[4], operands
[5]);
15371 gap
= offsets
[1] - offsets
[0];
15372 offset
= offsets
[0];
15375 /* Make sure accesses are to consecutive memory locations. */
15379 /* Make sure we generate legal instructions. */
15380 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15384 /* In Thumb state, where registers are almost unconstrained, there
15385 is little hope to fix it. */
15389 if (load
&& commute
)
15391 /* Try reordering registers. */
15392 std::swap (operands
[0], operands
[1]);
15393 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15400 /* If input registers are dead after this pattern, they can be
15401 reordered or replaced by other registers that are free in the
15402 current pattern. */
15403 if (!peep2_reg_dead_p (4, operands
[0])
15404 || !peep2_reg_dead_p (4, operands
[1]))
15407 /* Try to reorder the input registers. */
15408 /* For example, the code
15413 can be transformed into
15418 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15421 std::swap (operands
[0], operands
[1]);
15425 /* Try to find a free DI register. */
15426 CLEAR_HARD_REG_SET (regset
);
15427 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15428 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15431 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15432 if (tmp
== NULL_RTX
)
15435 /* DREG must be an even-numbered register in DImode.
15436 Split it into SI registers. */
15437 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15438 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15439 gcc_assert (operands
[0] != NULL_RTX
);
15440 gcc_assert (operands
[1] != NULL_RTX
);
15441 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15442 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15444 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15456 /* Print a symbolic form of X to the debug file, F. */
15458 arm_print_value (FILE *f
, rtx x
)
15460 switch (GET_CODE (x
))
15463 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15467 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15475 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15477 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15478 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15486 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15490 fprintf (f
, "`%s'", XSTR (x
, 0));
15494 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15498 arm_print_value (f
, XEXP (x
, 0));
15502 arm_print_value (f
, XEXP (x
, 0));
15504 arm_print_value (f
, XEXP (x
, 1));
15512 fprintf (f
, "????");
15517 /* Routines for manipulation of the constant pool. */
15519 /* Arm instructions cannot load a large constant directly into a
15520 register; they have to come from a pc relative load. The constant
15521 must therefore be placed in the addressable range of the pc
15522 relative load. Depending on the precise pc relative load
15523 instruction the range is somewhere between 256 bytes and 4k. This
15524 means that we often have to dump a constant inside a function, and
15525 generate code to branch around it.
15527 It is important to minimize this, since the branches will slow
15528 things down and make the code larger.
15530 Normally we can hide the table after an existing unconditional
15531 branch so that there is no interruption of the flow, but in the
15532 worst case the code looks like this:
15550 We fix this by performing a scan after scheduling, which notices
15551 which instructions need to have their operands fetched from the
15552 constant table and builds the table.
15554 The algorithm starts by building a table of all the constants that
15555 need fixing up and all the natural barriers in the function (places
15556 where a constant table can be dropped without breaking the flow).
15557 For each fixup we note how far the pc-relative replacement will be
15558 able to reach and the offset of the instruction into the function.
15560 Having built the table we then group the fixes together to form
15561 tables that are as large as possible (subject to addressing
15562 constraints) and emit each table of constants after the last
15563 barrier that is within range of all the instructions in the group.
15564 If a group does not contain a barrier, then we forcibly create one
15565 by inserting a jump instruction into the flow. Once the table has
15566 been inserted, the insns are then modified to reference the
15567 relevant entry in the pool.
15569 Possible enhancements to the algorithm (not implemented) are:
15571 1) For some processors and object formats, there may be benefit in
15572 aligning the pools to the start of cache lines; this alignment
15573 would need to be taken into account when calculating addressability
15576 /* These typedefs are located at the start of this file, so that
15577 they can be used in the prototypes there. This comment is to
15578 remind readers of that fact so that the following structures
15579 can be understood more easily.
15581 typedef struct minipool_node Mnode;
15582 typedef struct minipool_fixup Mfix; */
15584 struct minipool_node
15586 /* Doubly linked chain of entries. */
15589 /* The maximum offset into the code that this entry can be placed. While
15590 pushing fixes for forward references, all entries are sorted in order
15591 of increasing max_address. */
15592 HOST_WIDE_INT max_address
;
15593 /* Similarly for an entry inserted for a backwards ref. */
15594 HOST_WIDE_INT min_address
;
15595 /* The number of fixes referencing this entry. This can become zero
15596 if we "unpush" an entry. In this case we ignore the entry when we
15597 come to emit the code. */
15599 /* The offset from the start of the minipool. */
15600 HOST_WIDE_INT offset
;
15601 /* The value in table. */
15603 /* The mode of value. */
15605 /* The size of the value. With iWMMXt enabled
15606 sizes > 4 also imply an alignment of 8-bytes. */
15610 struct minipool_fixup
15614 HOST_WIDE_INT address
;
15620 HOST_WIDE_INT forwards
;
15621 HOST_WIDE_INT backwards
;
15624 /* Fixes less than a word need padding out to a word boundary. */
15625 #define MINIPOOL_FIX_SIZE(mode) \
15626 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15628 static Mnode
* minipool_vector_head
;
15629 static Mnode
* minipool_vector_tail
;
15630 static rtx_code_label
*minipool_vector_label
;
15631 static int minipool_pad
;
15633 /* The linked list of all minipool fixes required for this function. */
15634 Mfix
* minipool_fix_head
;
15635 Mfix
* minipool_fix_tail
;
15636 /* The fix entry for the current minipool, once it has been placed. */
15637 Mfix
* minipool_barrier
;
15639 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15640 #define JUMP_TABLES_IN_TEXT_SECTION 0
15643 static HOST_WIDE_INT
15644 get_jump_table_size (rtx_jump_table_data
*insn
)
15646 /* ADDR_VECs only take room if read-only data does into the text
15648 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15650 rtx body
= PATTERN (insn
);
15651 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15652 HOST_WIDE_INT size
;
15653 HOST_WIDE_INT modesize
;
15655 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15656 size
= modesize
* XVECLEN (body
, elt
);
15660 /* Round up size of TBB table to a halfword boundary. */
15661 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15664 /* No padding necessary for TBH. */
15667 /* Add two bytes for alignment on Thumb. */
15672 gcc_unreachable ();
15680 /* Return the maximum amount of padding that will be inserted before
15683 static HOST_WIDE_INT
15684 get_label_padding (rtx label
)
15686 HOST_WIDE_INT align
, min_insn_size
;
15688 align
= 1 << label_to_alignment (label
);
15689 min_insn_size
= TARGET_THUMB
? 2 : 4;
15690 return align
> min_insn_size
? align
- min_insn_size
: 0;
15693 /* Move a minipool fix MP from its current location to before MAX_MP.
15694 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15695 constraints may need updating. */
15697 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15698 HOST_WIDE_INT max_address
)
15700 /* The code below assumes these are different. */
15701 gcc_assert (mp
!= max_mp
);
15703 if (max_mp
== NULL
)
15705 if (max_address
< mp
->max_address
)
15706 mp
->max_address
= max_address
;
15710 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15711 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15713 mp
->max_address
= max_address
;
15715 /* Unlink MP from its current position. Since max_mp is non-null,
15716 mp->prev must be non-null. */
15717 mp
->prev
->next
= mp
->next
;
15718 if (mp
->next
!= NULL
)
15719 mp
->next
->prev
= mp
->prev
;
15721 minipool_vector_tail
= mp
->prev
;
15723 /* Re-insert it before MAX_MP. */
15725 mp
->prev
= max_mp
->prev
;
15728 if (mp
->prev
!= NULL
)
15729 mp
->prev
->next
= mp
;
15731 minipool_vector_head
= mp
;
15734 /* Save the new entry. */
15737 /* Scan over the preceding entries and adjust their addresses as
15739 while (mp
->prev
!= NULL
15740 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15742 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15749 /* Add a constant to the minipool for a forward reference. Returns the
15750 node added or NULL if the constant will not fit in this pool. */
15752 add_minipool_forward_ref (Mfix
*fix
)
15754 /* If set, max_mp is the first pool_entry that has a lower
15755 constraint than the one we are trying to add. */
15756 Mnode
* max_mp
= NULL
;
15757 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15760 /* If the minipool starts before the end of FIX->INSN then this FIX
15761 can not be placed into the current pool. Furthermore, adding the
15762 new constant pool entry may cause the pool to start FIX_SIZE bytes
15764 if (minipool_vector_head
&&
15765 (fix
->address
+ get_attr_length (fix
->insn
)
15766 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15769 /* Scan the pool to see if a constant with the same value has
15770 already been added. While we are doing this, also note the
15771 location where we must insert the constant if it doesn't already
15773 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15775 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15776 && fix
->mode
== mp
->mode
15777 && (!LABEL_P (fix
->value
)
15778 || (CODE_LABEL_NUMBER (fix
->value
)
15779 == CODE_LABEL_NUMBER (mp
->value
)))
15780 && rtx_equal_p (fix
->value
, mp
->value
))
15782 /* More than one fix references this entry. */
15784 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15787 /* Note the insertion point if necessary. */
15789 && mp
->max_address
> max_address
)
15792 /* If we are inserting an 8-bytes aligned quantity and
15793 we have not already found an insertion point, then
15794 make sure that all such 8-byte aligned quantities are
15795 placed at the start of the pool. */
15796 if (ARM_DOUBLEWORD_ALIGN
15798 && fix
->fix_size
>= 8
15799 && mp
->fix_size
< 8)
15802 max_address
= mp
->max_address
;
15806 /* The value is not currently in the minipool, so we need to create
15807 a new entry for it. If MAX_MP is NULL, the entry will be put on
15808 the end of the list since the placement is less constrained than
15809 any existing entry. Otherwise, we insert the new fix before
15810 MAX_MP and, if necessary, adjust the constraints on the other
15813 mp
->fix_size
= fix
->fix_size
;
15814 mp
->mode
= fix
->mode
;
15815 mp
->value
= fix
->value
;
15817 /* Not yet required for a backwards ref. */
15818 mp
->min_address
= -65536;
15820 if (max_mp
== NULL
)
15822 mp
->max_address
= max_address
;
15824 mp
->prev
= minipool_vector_tail
;
15826 if (mp
->prev
== NULL
)
15828 minipool_vector_head
= mp
;
15829 minipool_vector_label
= gen_label_rtx ();
15832 mp
->prev
->next
= mp
;
15834 minipool_vector_tail
= mp
;
15838 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15839 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15841 mp
->max_address
= max_address
;
15844 mp
->prev
= max_mp
->prev
;
15846 if (mp
->prev
!= NULL
)
15847 mp
->prev
->next
= mp
;
15849 minipool_vector_head
= mp
;
15852 /* Save the new entry. */
15855 /* Scan over the preceding entries and adjust their addresses as
15857 while (mp
->prev
!= NULL
15858 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15860 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15868 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15869 HOST_WIDE_INT min_address
)
15871 HOST_WIDE_INT offset
;
15873 /* The code below assumes these are different. */
15874 gcc_assert (mp
!= min_mp
);
15876 if (min_mp
== NULL
)
15878 if (min_address
> mp
->min_address
)
15879 mp
->min_address
= min_address
;
15883 /* We will adjust this below if it is too loose. */
15884 mp
->min_address
= min_address
;
15886 /* Unlink MP from its current position. Since min_mp is non-null,
15887 mp->next must be non-null. */
15888 mp
->next
->prev
= mp
->prev
;
15889 if (mp
->prev
!= NULL
)
15890 mp
->prev
->next
= mp
->next
;
15892 minipool_vector_head
= mp
->next
;
15894 /* Reinsert it after MIN_MP. */
15896 mp
->next
= min_mp
->next
;
15898 if (mp
->next
!= NULL
)
15899 mp
->next
->prev
= mp
;
15901 minipool_vector_tail
= mp
;
15907 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15909 mp
->offset
= offset
;
15910 if (mp
->refcount
> 0)
15911 offset
+= mp
->fix_size
;
15913 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15914 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15920 /* Add a constant to the minipool for a backward reference. Returns the
15921 node added or NULL if the constant will not fit in this pool.
15923 Note that the code for insertion for a backwards reference can be
15924 somewhat confusing because the calculated offsets for each fix do
15925 not take into account the size of the pool (which is still under
15928 add_minipool_backward_ref (Mfix
*fix
)
15930 /* If set, min_mp is the last pool_entry that has a lower constraint
15931 than the one we are trying to add. */
15932 Mnode
*min_mp
= NULL
;
15933 /* This can be negative, since it is only a constraint. */
15934 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
15937 /* If we can't reach the current pool from this insn, or if we can't
15938 insert this entry at the end of the pool without pushing other
15939 fixes out of range, then we don't try. This ensures that we
15940 can't fail later on. */
15941 if (min_address
>= minipool_barrier
->address
15942 || (minipool_vector_tail
->min_address
+ fix
->fix_size
15943 >= minipool_barrier
->address
))
15946 /* Scan the pool to see if a constant with the same value has
15947 already been added. While we are doing this, also note the
15948 location where we must insert the constant if it doesn't already
15950 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
15952 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15953 && fix
->mode
== mp
->mode
15954 && (!LABEL_P (fix
->value
)
15955 || (CODE_LABEL_NUMBER (fix
->value
)
15956 == CODE_LABEL_NUMBER (mp
->value
)))
15957 && rtx_equal_p (fix
->value
, mp
->value
)
15958 /* Check that there is enough slack to move this entry to the
15959 end of the table (this is conservative). */
15960 && (mp
->max_address
15961 > (minipool_barrier
->address
15962 + minipool_vector_tail
->offset
15963 + minipool_vector_tail
->fix_size
)))
15966 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
15969 if (min_mp
!= NULL
)
15970 mp
->min_address
+= fix
->fix_size
;
15973 /* Note the insertion point if necessary. */
15974 if (mp
->min_address
< min_address
)
15976 /* For now, we do not allow the insertion of 8-byte alignment
15977 requiring nodes anywhere but at the start of the pool. */
15978 if (ARM_DOUBLEWORD_ALIGN
15979 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15984 else if (mp
->max_address
15985 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
15987 /* Inserting before this entry would push the fix beyond
15988 its maximum address (which can happen if we have
15989 re-located a forwards fix); force the new fix to come
15991 if (ARM_DOUBLEWORD_ALIGN
15992 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15997 min_address
= mp
->min_address
+ fix
->fix_size
;
16000 /* Do not insert a non-8-byte aligned quantity before 8-byte
16001 aligned quantities. */
16002 else if (ARM_DOUBLEWORD_ALIGN
16003 && fix
->fix_size
< 8
16004 && mp
->fix_size
>= 8)
16007 min_address
= mp
->min_address
+ fix
->fix_size
;
16012 /* We need to create a new entry. */
16014 mp
->fix_size
= fix
->fix_size
;
16015 mp
->mode
= fix
->mode
;
16016 mp
->value
= fix
->value
;
16018 mp
->max_address
= minipool_barrier
->address
+ 65536;
16020 mp
->min_address
= min_address
;
16022 if (min_mp
== NULL
)
16025 mp
->next
= minipool_vector_head
;
16027 if (mp
->next
== NULL
)
16029 minipool_vector_tail
= mp
;
16030 minipool_vector_label
= gen_label_rtx ();
16033 mp
->next
->prev
= mp
;
16035 minipool_vector_head
= mp
;
16039 mp
->next
= min_mp
->next
;
16043 if (mp
->next
!= NULL
)
16044 mp
->next
->prev
= mp
;
16046 minipool_vector_tail
= mp
;
16049 /* Save the new entry. */
16057 /* Scan over the following entries and adjust their offsets. */
16058 while (mp
->next
!= NULL
)
16060 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16061 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16064 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16066 mp
->next
->offset
= mp
->offset
;
16075 assign_minipool_offsets (Mfix
*barrier
)
16077 HOST_WIDE_INT offset
= 0;
16080 minipool_barrier
= barrier
;
16082 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16084 mp
->offset
= offset
;
16086 if (mp
->refcount
> 0)
16087 offset
+= mp
->fix_size
;
16091 /* Output the literal table */
16093 dump_minipool (rtx_insn
*scan
)
16099 if (ARM_DOUBLEWORD_ALIGN
)
16100 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16101 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16108 fprintf (dump_file
,
16109 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16110 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16112 scan
= emit_label_after (gen_label_rtx (), scan
);
16113 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16114 scan
= emit_label_after (minipool_vector_label
, scan
);
16116 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16118 if (mp
->refcount
> 0)
16122 fprintf (dump_file
,
16123 ";; Offset %u, min %ld, max %ld ",
16124 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16125 (unsigned long) mp
->max_address
);
16126 arm_print_value (dump_file
, mp
->value
);
16127 fputc ('\n', dump_file
);
16130 rtx val
= copy_rtx (mp
->value
);
16132 switch (GET_MODE_SIZE (mp
->mode
))
16134 #ifdef HAVE_consttable_1
16136 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16140 #ifdef HAVE_consttable_2
16142 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16146 #ifdef HAVE_consttable_4
16148 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16152 #ifdef HAVE_consttable_8
16154 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16158 #ifdef HAVE_consttable_16
16160 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16165 gcc_unreachable ();
16173 minipool_vector_head
= minipool_vector_tail
= NULL
;
16174 scan
= emit_insn_after (gen_consttable_end (), scan
);
16175 scan
= emit_barrier_after (scan
);
16178 /* Return the cost of forcibly inserting a barrier after INSN. */
16180 arm_barrier_cost (rtx_insn
*insn
)
16182 /* Basing the location of the pool on the loop depth is preferable,
16183 but at the moment, the basic block information seems to be
16184 corrupt by this stage of the compilation. */
16185 int base_cost
= 50;
16186 rtx_insn
*next
= next_nonnote_insn (insn
);
16188 if (next
!= NULL
&& LABEL_P (next
))
16191 switch (GET_CODE (insn
))
16194 /* It will always be better to place the table before the label, rather
16203 return base_cost
- 10;
16206 return base_cost
+ 10;
16210 /* Find the best place in the insn stream in the range
16211 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16212 Create the barrier by inserting a jump and add a new fix entry for
16215 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16217 HOST_WIDE_INT count
= 0;
16218 rtx_barrier
*barrier
;
16219 rtx_insn
*from
= fix
->insn
;
16220 /* The instruction after which we will insert the jump. */
16221 rtx_insn
*selected
= NULL
;
16223 /* The address at which the jump instruction will be placed. */
16224 HOST_WIDE_INT selected_address
;
16226 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16227 rtx_code_label
*label
= gen_label_rtx ();
16229 selected_cost
= arm_barrier_cost (from
);
16230 selected_address
= fix
->address
;
16232 while (from
&& count
< max_count
)
16234 rtx_jump_table_data
*tmp
;
16237 /* This code shouldn't have been called if there was a natural barrier
16239 gcc_assert (!BARRIER_P (from
));
16241 /* Count the length of this insn. This must stay in sync with the
16242 code that pushes minipool fixes. */
16243 if (LABEL_P (from
))
16244 count
+= get_label_padding (from
);
16246 count
+= get_attr_length (from
);
16248 /* If there is a jump table, add its length. */
16249 if (tablejump_p (from
, NULL
, &tmp
))
16251 count
+= get_jump_table_size (tmp
);
16253 /* Jump tables aren't in a basic block, so base the cost on
16254 the dispatch insn. If we select this location, we will
16255 still put the pool after the table. */
16256 new_cost
= arm_barrier_cost (from
);
16258 if (count
< max_count
16259 && (!selected
|| new_cost
<= selected_cost
))
16262 selected_cost
= new_cost
;
16263 selected_address
= fix
->address
+ count
;
16266 /* Continue after the dispatch table. */
16267 from
= NEXT_INSN (tmp
);
16271 new_cost
= arm_barrier_cost (from
);
16273 if (count
< max_count
16274 && (!selected
|| new_cost
<= selected_cost
))
16277 selected_cost
= new_cost
;
16278 selected_address
= fix
->address
+ count
;
16281 from
= NEXT_INSN (from
);
16284 /* Make sure that we found a place to insert the jump. */
16285 gcc_assert (selected
);
16287 /* Make sure we do not split a call and its corresponding
16288 CALL_ARG_LOCATION note. */
16289 if (CALL_P (selected
))
16291 rtx_insn
*next
= NEXT_INSN (selected
);
16292 if (next
&& NOTE_P (next
)
16293 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16297 /* Create a new JUMP_INSN that branches around a barrier. */
16298 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16299 JUMP_LABEL (from
) = label
;
16300 barrier
= emit_barrier_after (from
);
16301 emit_label_after (label
, barrier
);
16303 /* Create a minipool barrier entry for the new barrier. */
16304 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16305 new_fix
->insn
= barrier
;
16306 new_fix
->address
= selected_address
;
16307 new_fix
->next
= fix
->next
;
16308 fix
->next
= new_fix
;
16313 /* Record that there is a natural barrier in the insn stream at
16316 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16318 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16321 fix
->address
= address
;
16324 if (minipool_fix_head
!= NULL
)
16325 minipool_fix_tail
->next
= fix
;
16327 minipool_fix_head
= fix
;
16329 minipool_fix_tail
= fix
;
16332 /* Record INSN, which will need fixing up to load a value from the
16333 minipool. ADDRESS is the offset of the insn since the start of the
16334 function; LOC is a pointer to the part of the insn which requires
16335 fixing; VALUE is the constant that must be loaded, which is of type
16338 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16339 machine_mode mode
, rtx value
)
16341 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16344 fix
->address
= address
;
16347 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16348 fix
->value
= value
;
16349 fix
->forwards
= get_attr_pool_range (insn
);
16350 fix
->backwards
= get_attr_neg_pool_range (insn
);
16351 fix
->minipool
= NULL
;
16353 /* If an insn doesn't have a range defined for it, then it isn't
16354 expecting to be reworked by this code. Better to stop now than
16355 to generate duff assembly code. */
16356 gcc_assert (fix
->forwards
|| fix
->backwards
);
16358 /* If an entry requires 8-byte alignment then assume all constant pools
16359 require 4 bytes of padding. Trying to do this later on a per-pool
16360 basis is awkward because existing pool entries have to be modified. */
16361 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16366 fprintf (dump_file
,
16367 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16368 GET_MODE_NAME (mode
),
16369 INSN_UID (insn
), (unsigned long) address
,
16370 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16371 arm_print_value (dump_file
, fix
->value
);
16372 fprintf (dump_file
, "\n");
16375 /* Add it to the chain of fixes. */
16378 if (minipool_fix_head
!= NULL
)
16379 minipool_fix_tail
->next
= fix
;
16381 minipool_fix_head
= fix
;
16383 minipool_fix_tail
= fix
;
16386 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16387 Returns the number of insns needed, or 99 if we always want to synthesize
16390 arm_max_const_double_inline_cost ()
16392 /* Let the value get synthesized to avoid the use of literal pools. */
16393 if (arm_disable_literal_pool
)
16396 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16399 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16400 Returns the number of insns needed, or 99 if we don't know how to
16403 arm_const_double_inline_cost (rtx val
)
16405 rtx lowpart
, highpart
;
16408 mode
= GET_MODE (val
);
16410 if (mode
== VOIDmode
)
16413 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16415 lowpart
= gen_lowpart (SImode
, val
);
16416 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16418 gcc_assert (CONST_INT_P (lowpart
));
16419 gcc_assert (CONST_INT_P (highpart
));
16421 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16422 NULL_RTX
, NULL_RTX
, 0, 0)
16423 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16424 NULL_RTX
, NULL_RTX
, 0, 0));
16427 /* Cost of loading a SImode constant. */
16429 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16431 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16432 NULL_RTX
, NULL_RTX
, 1, 0);
16435 /* Return true if it is worthwhile to split a 64-bit constant into two
16436 32-bit operations. This is the case if optimizing for size, or
16437 if we have load delay slots, or if one 32-bit part can be done with
16438 a single data operation. */
16440 arm_const_double_by_parts (rtx val
)
16442 machine_mode mode
= GET_MODE (val
);
16445 if (optimize_size
|| arm_ld_sched
)
16448 if (mode
== VOIDmode
)
16451 part
= gen_highpart_mode (SImode
, mode
, val
);
16453 gcc_assert (CONST_INT_P (part
));
16455 if (const_ok_for_arm (INTVAL (part
))
16456 || const_ok_for_arm (~INTVAL (part
)))
16459 part
= gen_lowpart (SImode
, val
);
16461 gcc_assert (CONST_INT_P (part
));
16463 if (const_ok_for_arm (INTVAL (part
))
16464 || const_ok_for_arm (~INTVAL (part
)))
16470 /* Return true if it is possible to inline both the high and low parts
16471 of a 64-bit constant into 32-bit data processing instructions. */
16473 arm_const_double_by_immediates (rtx val
)
16475 machine_mode mode
= GET_MODE (val
);
16478 if (mode
== VOIDmode
)
16481 part
= gen_highpart_mode (SImode
, mode
, val
);
16483 gcc_assert (CONST_INT_P (part
));
16485 if (!const_ok_for_arm (INTVAL (part
)))
16488 part
= gen_lowpart (SImode
, val
);
16490 gcc_assert (CONST_INT_P (part
));
16492 if (!const_ok_for_arm (INTVAL (part
)))
16498 /* Scan INSN and note any of its operands that need fixing.
16499 If DO_PUSHES is false we do not actually push any of the fixups
16502 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16506 extract_constrain_insn (insn
);
16508 if (recog_data
.n_alternatives
== 0)
16511 /* Fill in recog_op_alt with information about the constraints of
16513 preprocess_constraints (insn
);
16515 const operand_alternative
*op_alt
= which_op_alt ();
16516 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16518 /* Things we need to fix can only occur in inputs. */
16519 if (recog_data
.operand_type
[opno
] != OP_IN
)
16522 /* If this alternative is a memory reference, then any mention
16523 of constants in this alternative is really to fool reload
16524 into allowing us to accept one there. We need to fix them up
16525 now so that we output the right code. */
16526 if (op_alt
[opno
].memory_ok
)
16528 rtx op
= recog_data
.operand
[opno
];
16530 if (CONSTANT_P (op
))
16533 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16534 recog_data
.operand_mode
[opno
], op
);
16536 else if (MEM_P (op
)
16537 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16538 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16542 rtx cop
= avoid_constant_pool_reference (op
);
16544 /* Casting the address of something to a mode narrower
16545 than a word can cause avoid_constant_pool_reference()
16546 to return the pool reference itself. That's no good to
16547 us here. Lets just hope that we can use the
16548 constant pool value directly. */
16550 cop
= get_pool_constant (XEXP (op
, 0));
16552 push_minipool_fix (insn
, address
,
16553 recog_data
.operand_loc
[opno
],
16554 recog_data
.operand_mode
[opno
], cop
);
16564 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16565 and unions in the context of ARMv8-M Security Extensions. It is used as a
16566 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16567 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16568 or four masks, depending on whether it is being computed for a
16569 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16570 respectively. The tree for the type of the argument or a field within an
16571 argument is passed in ARG_TYPE, the current register this argument or field
16572 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16573 argument or field starts at is passed in STARTING_BIT and the last used bit
16574 is kept in LAST_USED_BIT which is also updated accordingly. */
16576 static unsigned HOST_WIDE_INT
16577 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16578 uint32_t * padding_bits_to_clear
,
16579 unsigned starting_bit
, int * last_used_bit
)
16582 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16584 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16586 unsigned current_bit
= starting_bit
;
16588 long int offset
, size
;
16591 field
= TYPE_FIELDS (arg_type
);
16594 /* The offset within a structure is always an offset from
16595 the start of that structure. Make sure we take that into the
16596 calculation of the register based offset that we use here. */
16597 offset
= starting_bit
;
16598 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16601 /* This is the actual size of the field, for bitfields this is the
16602 bitfield width and not the container size. */
16603 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16605 if (*last_used_bit
!= offset
)
16607 if (offset
< *last_used_bit
)
16609 /* This field's offset is before the 'last_used_bit', that
16610 means this field goes on the next register. So we need to
16611 pad the rest of the current register and increase the
16612 register number. */
16614 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16617 padding_bits_to_clear
[*regno
] |= mask
;
16618 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16623 /* Otherwise we pad the bits between the last field's end and
16624 the start of the new field. */
16627 mask
= ((uint32_t)-1) >> (32 - offset
);
16628 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16629 padding_bits_to_clear
[*regno
] |= mask
;
16631 current_bit
= offset
;
16634 /* Calculate further padding bits for inner structs/unions too. */
16635 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16637 *last_used_bit
= current_bit
;
16638 not_to_clear_reg_mask
16639 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16640 padding_bits_to_clear
, offset
,
16645 /* Update 'current_bit' with this field's size. If the
16646 'current_bit' lies in a subsequent register, update 'regno' and
16647 reset 'current_bit' to point to the current bit in that new
16649 current_bit
+= size
;
16650 while (current_bit
>= 32)
16653 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16656 *last_used_bit
= current_bit
;
16659 field
= TREE_CHAIN (field
);
16661 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16663 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16665 tree field
, field_t
;
16666 int i
, regno_t
, field_size
;
16670 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16671 = {-1, -1, -1, -1};
16673 /* To compute the padding bits in a union we only consider bits as
16674 padding bits if they are always either a padding bit or fall outside a
16675 fields size for all fields in the union. */
16676 field
= TYPE_FIELDS (arg_type
);
16679 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16680 = {0U, 0U, 0U, 0U};
16681 int last_used_bit_t
= *last_used_bit
;
16683 field_t
= TREE_TYPE (field
);
16685 /* If the field's type is either a record or a union make sure to
16686 compute their padding bits too. */
16687 if (RECORD_OR_UNION_TYPE_P (field_t
))
16688 not_to_clear_reg_mask
16689 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16690 &padding_bits_to_clear_t
[0],
16691 starting_bit
, &last_used_bit_t
);
16694 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16695 regno_t
= (field_size
/ 32) + *regno
;
16696 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16699 for (i
= *regno
; i
< regno_t
; i
++)
16701 /* For all but the last register used by this field only keep the
16702 padding bits that were padding bits in this field. */
16703 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16706 /* For the last register, keep all padding bits that were padding
16707 bits in this field and any padding bits that are still valid
16708 as padding bits but fall outside of this field's size. */
16709 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16710 padding_bits_to_clear_res
[regno_t
]
16711 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16713 /* Update the maximum size of the fields in terms of registers used
16714 ('max_reg') and the 'last_used_bit' in said register. */
16715 if (max_reg
< regno_t
)
16718 max_bit
= last_used_bit_t
;
16720 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16721 max_bit
= last_used_bit_t
;
16723 field
= TREE_CHAIN (field
);
16726 /* Update the current padding_bits_to_clear using the intersection of the
16727 padding bits of all the fields. */
16728 for (i
=*regno
; i
< max_reg
; i
++)
16729 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16731 /* Do not keep trailing padding bits, we do not know yet whether this
16732 is the end of the argument. */
16733 mask
= ((uint32_t) 1 << max_bit
) - 1;
16734 padding_bits_to_clear
[max_reg
]
16735 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16738 *last_used_bit
= max_bit
;
16741 /* This function should only be used for structs and unions. */
16742 gcc_unreachable ();
16744 return not_to_clear_reg_mask
;
16747 /* In the context of ARMv8-M Security Extensions, this function is used for both
16748 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16749 registers are used when returning or passing arguments, which is then
16750 returned as a mask. It will also compute a mask to indicate padding/unused
16751 bits for each of these registers, and passes this through the
16752 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16753 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16754 the starting register used to pass this argument or return value is passed
16755 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16756 for struct and union types. */
16758 static unsigned HOST_WIDE_INT
16759 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16760 uint32_t * padding_bits_to_clear
)
16763 int last_used_bit
= 0;
16764 unsigned HOST_WIDE_INT not_to_clear_mask
;
16766 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16769 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16770 padding_bits_to_clear
, 0,
16774 /* If the 'last_used_bit' is not zero, that means we are still using a
16775 part of the last 'regno'. In such cases we must clear the trailing
16776 bits. Otherwise we are not using regno and we should mark it as to
16778 if (last_used_bit
!= 0)
16779 padding_bits_to_clear
[regno
]
16780 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16782 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16786 not_to_clear_mask
= 0;
16787 /* We are not dealing with structs nor unions. So these arguments may be
16788 passed in floating point registers too. In some cases a BLKmode is
16789 used when returning or passing arguments in multiple VFP registers. */
16790 if (GET_MODE (arg_rtx
) == BLKmode
)
16795 /* This should really only occur when dealing with the hard-float
16797 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16799 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16801 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16802 gcc_assert (REG_P (reg
));
16804 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16806 /* If we are dealing with DF mode, make sure we don't
16807 clear either of the registers it addresses. */
16808 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16811 unsigned HOST_WIDE_INT mask
;
16812 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16813 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16814 not_to_clear_mask
|= mask
;
16820 /* Otherwise we can rely on the MODE to determine how many registers
16821 are being used by this argument. */
16822 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16823 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16826 unsigned HOST_WIDE_INT
16827 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16828 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16829 not_to_clear_mask
|= mask
;
16834 return not_to_clear_mask
;
16837 /* Saves callee saved registers, clears callee saved registers and caller saved
16838 registers not used to pass arguments before a cmse_nonsecure_call. And
16839 restores the callee saved registers after. */
16842 cmse_nonsecure_call_clear_caller_saved (void)
16846 FOR_EACH_BB_FN (bb
, cfun
)
16850 FOR_BB_INSNS (bb
, insn
)
16852 uint64_t to_clear_mask
, float_mask
;
16854 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16855 unsigned int regno
, maxregno
;
16857 CUMULATIVE_ARGS args_so_far_v
;
16858 cumulative_args_t args_so_far
;
16859 tree arg_type
, fntype
;
16860 bool using_r4
, first_param
= true;
16861 function_args_iterator args_iter
;
16862 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
16863 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
16865 if (!NONDEBUG_INSN_P (insn
))
16868 if (!CALL_P (insn
))
16871 pat
= PATTERN (insn
);
16872 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
16873 call
= XVECEXP (pat
, 0, 0);
16875 /* Get the real call RTX if the insn sets a value, ie. returns. */
16876 if (GET_CODE (call
) == SET
)
16877 call
= SET_SRC (call
);
16879 /* Check if it is a cmse_nonsecure_call. */
16880 unspec
= XEXP (call
, 0);
16881 if (GET_CODE (unspec
) != UNSPEC
16882 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
16885 /* Determine the caller-saved registers we need to clear. */
16886 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
16887 maxregno
= NUM_ARG_REGS
- 1;
16888 /* Only look at the caller-saved floating point registers in case of
16889 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16890 lazy store and loads which clear both caller- and callee-saved
16892 if (TARGET_HARD_FLOAT_ABI
)
16894 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
16895 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
16896 to_clear_mask
|= float_mask
;
16897 maxregno
= D7_VFP_REGNUM
;
16900 /* Make sure the register used to hold the function address is not
16902 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
16903 gcc_assert (MEM_P (address
));
16904 gcc_assert (REG_P (XEXP (address
, 0)));
16905 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
16907 /* Set basic block of call insn so that df rescan is performed on
16908 insns inserted here. */
16909 set_block_for_insn (insn
, bb
);
16910 df_set_flags (DF_DEFER_INSN_RESCAN
);
16913 /* Make sure the scheduler doesn't schedule other insns beyond
16915 emit_insn (gen_blockage ());
16917 /* Walk through all arguments and clear registers appropriately.
16919 fntype
= TREE_TYPE (MEM_EXPR (address
));
16920 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
16922 args_so_far
= pack_cumulative_args (&args_so_far_v
);
16923 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
16926 machine_mode arg_mode
= TYPE_MODE (arg_type
);
16928 if (VOID_TYPE_P (arg_type
))
16932 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
16935 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
16937 gcc_assert (REG_P (arg_rtx
));
16939 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
16941 padding_bits_to_clear_ptr
);
16943 first_param
= false;
16946 /* Clear padding bits where needed. */
16947 cleared_reg
= XEXP (address
, 0);
16948 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
16950 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
16952 if (padding_bits_to_clear
[regno
] == 0)
16955 /* If this is a Thumb-1 target copy the address of the function
16956 we are calling from 'r4' into 'ip' such that we can use r4 to
16957 clear the unused bits in the arguments. */
16958 if (TARGET_THUMB1
&& !using_r4
)
16962 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
16966 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
16967 emit_move_insn (reg
, tmp
);
16968 /* Also fill the top half of the negated
16969 padding_bits_to_clear. */
16970 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
16972 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
16973 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
16979 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
16980 gen_rtx_REG (SImode
, regno
),
16985 emit_move_insn (cleared_reg
,
16986 gen_rtx_REG (SImode
, IP_REGNUM
));
16988 /* We use right shift and left shift to clear the LSB of the address
16989 we jump to instead of using bic, to avoid having to use an extra
16990 register on Thumb-1. */
16991 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
16992 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
16993 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
16994 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
16996 /* Clearing all registers that leak before doing a non-secure
16998 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17000 if (!(to_clear_mask
& (1LL << regno
)))
17003 /* If regno is an even vfp register and its successor is also to
17004 be cleared, use vmov. */
17005 if (IS_VFP_REGNUM (regno
))
17007 if (TARGET_VFP_DOUBLE
17008 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17009 && to_clear_mask
& (1LL << (regno
+ 1)))
17010 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
17011 CONST0_RTX (DFmode
));
17013 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17014 CONST0_RTX (SFmode
));
17017 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17020 seq
= get_insns ();
17022 emit_insn_before (seq
, insn
);
17028 /* Rewrite move insn into subtract of 0 if the condition codes will
17029 be useful in next conditional jump insn. */
17032 thumb1_reorg (void)
17036 FOR_EACH_BB_FN (bb
, cfun
)
17039 rtx cmp
, op0
, op1
, set
= NULL
;
17040 rtx_insn
*prev
, *insn
= BB_END (bb
);
17041 bool insn_clobbered
= false;
17043 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17044 insn
= PREV_INSN (insn
);
17046 /* Find the last cbranchsi4_insn in basic block BB. */
17047 if (insn
== BB_HEAD (bb
)
17048 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17051 /* Get the register with which we are comparing. */
17052 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17053 op0
= XEXP (cmp
, 0);
17054 op1
= XEXP (cmp
, 1);
17056 /* Check that comparison is against ZERO. */
17057 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17060 /* Find the first flag setting insn before INSN in basic block BB. */
17061 gcc_assert (insn
!= BB_HEAD (bb
));
17062 for (prev
= PREV_INSN (insn
);
17064 && prev
!= BB_HEAD (bb
)
17066 || DEBUG_INSN_P (prev
)
17067 || ((set
= single_set (prev
)) != NULL
17068 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17069 prev
= PREV_INSN (prev
))
17071 if (reg_set_p (op0
, prev
))
17072 insn_clobbered
= true;
17075 /* Skip if op0 is clobbered by insn other than prev. */
17076 if (insn_clobbered
)
17082 dest
= SET_DEST (set
);
17083 src
= SET_SRC (set
);
17084 if (!low_register_operand (dest
, SImode
)
17085 || !low_register_operand (src
, SImode
))
17088 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17089 in INSN. Both src and dest of the move insn are checked. */
17090 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17092 dest
= copy_rtx (dest
);
17093 src
= copy_rtx (src
);
17094 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17095 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17096 INSN_CODE (prev
) = -1;
17097 /* Set test register in INSN to dest. */
17098 XEXP (cmp
, 0) = copy_rtx (dest
);
17099 INSN_CODE (insn
) = -1;
17104 /* Convert instructions to their cc-clobbering variant if possible, since
17105 that allows us to use smaller encodings. */
17108 thumb2_reorg (void)
17113 INIT_REG_SET (&live
);
17115 /* We are freeing block_for_insn in the toplev to keep compatibility
17116 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17117 compute_bb_for_insn ();
17120 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17122 FOR_EACH_BB_FN (bb
, cfun
)
17124 if ((current_tune
->disparage_flag_setting_t16_encodings
17125 == tune_params::DISPARAGE_FLAGS_ALL
)
17126 && optimize_bb_for_speed_p (bb
))
17130 Convert_Action action
= SKIP
;
17131 Convert_Action action_for_partial_flag_setting
17132 = ((current_tune
->disparage_flag_setting_t16_encodings
17133 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17134 && optimize_bb_for_speed_p (bb
))
17137 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17138 df_simulate_initialize_backwards (bb
, &live
);
17139 FOR_BB_INSNS_REVERSE (bb
, insn
)
17141 if (NONJUMP_INSN_P (insn
)
17142 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17143 && GET_CODE (PATTERN (insn
)) == SET
)
17146 rtx pat
= PATTERN (insn
);
17147 rtx dst
= XEXP (pat
, 0);
17148 rtx src
= XEXP (pat
, 1);
17149 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17151 if (UNARY_P (src
) || BINARY_P (src
))
17152 op0
= XEXP (src
, 0);
17154 if (BINARY_P (src
))
17155 op1
= XEXP (src
, 1);
17157 if (low_register_operand (dst
, SImode
))
17159 switch (GET_CODE (src
))
17162 /* Adding two registers and storing the result
17163 in the first source is already a 16-bit
17165 if (rtx_equal_p (dst
, op0
)
17166 && register_operand (op1
, SImode
))
17169 if (low_register_operand (op0
, SImode
))
17171 /* ADDS <Rd>,<Rn>,<Rm> */
17172 if (low_register_operand (op1
, SImode
))
17174 /* ADDS <Rdn>,#<imm8> */
17175 /* SUBS <Rdn>,#<imm8> */
17176 else if (rtx_equal_p (dst
, op0
)
17177 && CONST_INT_P (op1
)
17178 && IN_RANGE (INTVAL (op1
), -255, 255))
17180 /* ADDS <Rd>,<Rn>,#<imm3> */
17181 /* SUBS <Rd>,<Rn>,#<imm3> */
17182 else if (CONST_INT_P (op1
)
17183 && IN_RANGE (INTVAL (op1
), -7, 7))
17186 /* ADCS <Rd>, <Rn> */
17187 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17188 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17189 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17191 && COMPARISON_P (op1
)
17192 && cc_register (XEXP (op1
, 0), VOIDmode
)
17193 && maybe_get_arm_condition_code (op1
) == ARM_CS
17194 && XEXP (op1
, 1) == const0_rtx
)
17199 /* RSBS <Rd>,<Rn>,#0
17200 Not handled here: see NEG below. */
17201 /* SUBS <Rd>,<Rn>,#<imm3>
17203 Not handled here: see PLUS above. */
17204 /* SUBS <Rd>,<Rn>,<Rm> */
17205 if (low_register_operand (op0
, SImode
)
17206 && low_register_operand (op1
, SImode
))
17211 /* MULS <Rdm>,<Rn>,<Rdm>
17212 As an exception to the rule, this is only used
17213 when optimizing for size since MULS is slow on all
17214 known implementations. We do not even want to use
17215 MULS in cold code, if optimizing for speed, so we
17216 test the global flag here. */
17217 if (!optimize_size
)
17219 /* Fall through. */
17223 /* ANDS <Rdn>,<Rm> */
17224 if (rtx_equal_p (dst
, op0
)
17225 && low_register_operand (op1
, SImode
))
17226 action
= action_for_partial_flag_setting
;
17227 else if (rtx_equal_p (dst
, op1
)
17228 && low_register_operand (op0
, SImode
))
17229 action
= action_for_partial_flag_setting
== SKIP
17230 ? SKIP
: SWAP_CONV
;
17236 /* ASRS <Rdn>,<Rm> */
17237 /* LSRS <Rdn>,<Rm> */
17238 /* LSLS <Rdn>,<Rm> */
17239 if (rtx_equal_p (dst
, op0
)
17240 && low_register_operand (op1
, SImode
))
17241 action
= action_for_partial_flag_setting
;
17242 /* ASRS <Rd>,<Rm>,#<imm5> */
17243 /* LSRS <Rd>,<Rm>,#<imm5> */
17244 /* LSLS <Rd>,<Rm>,#<imm5> */
17245 else if (low_register_operand (op0
, SImode
)
17246 && CONST_INT_P (op1
)
17247 && IN_RANGE (INTVAL (op1
), 0, 31))
17248 action
= action_for_partial_flag_setting
;
17252 /* RORS <Rdn>,<Rm> */
17253 if (rtx_equal_p (dst
, op0
)
17254 && low_register_operand (op1
, SImode
))
17255 action
= action_for_partial_flag_setting
;
17259 /* MVNS <Rd>,<Rm> */
17260 if (low_register_operand (op0
, SImode
))
17261 action
= action_for_partial_flag_setting
;
17265 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17266 if (low_register_operand (op0
, SImode
))
17271 /* MOVS <Rd>,#<imm8> */
17272 if (CONST_INT_P (src
)
17273 && IN_RANGE (INTVAL (src
), 0, 255))
17274 action
= action_for_partial_flag_setting
;
17278 /* MOVS and MOV<c> with registers have different
17279 encodings, so are not relevant here. */
17287 if (action
!= SKIP
)
17289 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17290 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17293 if (action
== SWAP_CONV
)
17295 src
= copy_rtx (src
);
17296 XEXP (src
, 0) = op1
;
17297 XEXP (src
, 1) = op0
;
17298 pat
= gen_rtx_SET (dst
, src
);
17299 vec
= gen_rtvec (2, pat
, clobber
);
17301 else /* action == CONV */
17302 vec
= gen_rtvec (2, pat
, clobber
);
17304 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17305 INSN_CODE (insn
) = -1;
17309 if (NONDEBUG_INSN_P (insn
))
17310 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17314 CLEAR_REG_SET (&live
);
17317 /* Gcc puts the pool in the wrong place for ARM, since we can only
17318 load addresses a limited distance around the pc. We do some
17319 special munging to move the constant pool values to the correct
17320 point in the code. */
17325 HOST_WIDE_INT address
= 0;
17329 cmse_nonsecure_call_clear_caller_saved ();
17332 else if (TARGET_THUMB2
)
17335 /* Ensure all insns that must be split have been split at this point.
17336 Otherwise, the pool placement code below may compute incorrect
17337 insn lengths. Note that when optimizing, all insns have already
17338 been split at this point. */
17340 split_all_insns_noflow ();
17342 minipool_fix_head
= minipool_fix_tail
= NULL
;
17344 /* The first insn must always be a note, or the code below won't
17345 scan it properly. */
17346 insn
= get_insns ();
17347 gcc_assert (NOTE_P (insn
));
17350 /* Scan all the insns and record the operands that will need fixing. */
17351 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17353 if (BARRIER_P (insn
))
17354 push_minipool_barrier (insn
, address
);
17355 else if (INSN_P (insn
))
17357 rtx_jump_table_data
*table
;
17359 note_invalid_constants (insn
, address
, true);
17360 address
+= get_attr_length (insn
);
17362 /* If the insn is a vector jump, add the size of the table
17363 and skip the table. */
17364 if (tablejump_p (insn
, NULL
, &table
))
17366 address
+= get_jump_table_size (table
);
17370 else if (LABEL_P (insn
))
17371 /* Add the worst-case padding due to alignment. We don't add
17372 the _current_ padding because the minipool insertions
17373 themselves might change it. */
17374 address
+= get_label_padding (insn
);
17377 fix
= minipool_fix_head
;
17379 /* Now scan the fixups and perform the required changes. */
17384 Mfix
* last_added_fix
;
17385 Mfix
* last_barrier
= NULL
;
17388 /* Skip any further barriers before the next fix. */
17389 while (fix
&& BARRIER_P (fix
->insn
))
17392 /* No more fixes. */
17396 last_added_fix
= NULL
;
17398 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17400 if (BARRIER_P (ftmp
->insn
))
17402 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17405 last_barrier
= ftmp
;
17407 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17410 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17413 /* If we found a barrier, drop back to that; any fixes that we
17414 could have reached but come after the barrier will now go in
17415 the next mini-pool. */
17416 if (last_barrier
!= NULL
)
17418 /* Reduce the refcount for those fixes that won't go into this
17420 for (fdel
= last_barrier
->next
;
17421 fdel
&& fdel
!= ftmp
;
17424 fdel
->minipool
->refcount
--;
17425 fdel
->minipool
= NULL
;
17428 ftmp
= last_barrier
;
17432 /* ftmp is first fix that we can't fit into this pool and
17433 there no natural barriers that we could use. Insert a
17434 new barrier in the code somewhere between the previous
17435 fix and this one, and arrange to jump around it. */
17436 HOST_WIDE_INT max_address
;
17438 /* The last item on the list of fixes must be a barrier, so
17439 we can never run off the end of the list of fixes without
17440 last_barrier being set. */
17443 max_address
= minipool_vector_head
->max_address
;
17444 /* Check that there isn't another fix that is in range that
17445 we couldn't fit into this pool because the pool was
17446 already too large: we need to put the pool before such an
17447 instruction. The pool itself may come just after the
17448 fix because create_fix_barrier also allows space for a
17449 jump instruction. */
17450 if (ftmp
->address
< max_address
)
17451 max_address
= ftmp
->address
+ 1;
17453 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17456 assign_minipool_offsets (last_barrier
);
17460 if (!BARRIER_P (ftmp
->insn
)
17461 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17468 /* Scan over the fixes we have identified for this pool, fixing them
17469 up and adding the constants to the pool itself. */
17470 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17471 this_fix
= this_fix
->next
)
17472 if (!BARRIER_P (this_fix
->insn
))
17475 = plus_constant (Pmode
,
17476 gen_rtx_LABEL_REF (VOIDmode
,
17477 minipool_vector_label
),
17478 this_fix
->minipool
->offset
);
17479 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17482 dump_minipool (last_barrier
->insn
);
17486 /* From now on we must synthesize any constants that we can't handle
17487 directly. This can happen if the RTL gets split during final
17488 instruction generation. */
17489 cfun
->machine
->after_arm_reorg
= 1;
17491 /* Free the minipool memory. */
17492 obstack_free (&minipool_obstack
, minipool_startobj
);
17495 /* Routines to output assembly language. */
17497 /* Return string representation of passed in real value. */
17498 static const char *
17499 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17501 if (!fp_consts_inited
)
17504 gcc_assert (real_equal (r
, &value_fp0
));
17508 /* OPERANDS[0] is the entire list of insns that constitute pop,
17509 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17510 is in the list, UPDATE is true iff the list contains explicit
17511 update of base register. */
17513 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17519 const char *conditional
;
17520 int num_saves
= XVECLEN (operands
[0], 0);
17521 unsigned int regno
;
17522 unsigned int regno_base
= REGNO (operands
[1]);
17523 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17526 offset
+= update
? 1 : 0;
17527 offset
+= return_pc
? 1 : 0;
17529 /* Is the base register in the list? */
17530 for (i
= offset
; i
< num_saves
; i
++)
17532 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17533 /* If SP is in the list, then the base register must be SP. */
17534 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17535 /* If base register is in the list, there must be no explicit update. */
17536 if (regno
== regno_base
)
17537 gcc_assert (!update
);
17540 conditional
= reverse
? "%?%D0" : "%?%d0";
17541 /* Can't use POP if returning from an interrupt. */
17542 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17543 sprintf (pattern
, "pop%s\t{", conditional
);
17546 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17547 It's just a convention, their semantics are identical. */
17548 if (regno_base
== SP_REGNUM
)
17549 sprintf (pattern
, "ldmfd%s\t", conditional
);
17551 sprintf (pattern
, "ldmia%s\t", conditional
);
17553 sprintf (pattern
, "ldm%s\t", conditional
);
17555 strcat (pattern
, reg_names
[regno_base
]);
17557 strcat (pattern
, "!, {");
17559 strcat (pattern
, ", {");
17562 /* Output the first destination register. */
17564 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17566 /* Output the rest of the destination registers. */
17567 for (i
= offset
+ 1; i
< num_saves
; i
++)
17569 strcat (pattern
, ", ");
17571 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17574 strcat (pattern
, "}");
17576 if (interrupt_p
&& return_pc
)
17577 strcat (pattern
, "^");
17579 output_asm_insn (pattern
, &cond
);
17583 /* Output the assembly for a store multiple. */
17586 vfp_output_vstmd (rtx
* operands
)
17592 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17593 ? XEXP (operands
[0], 0)
17594 : XEXP (XEXP (operands
[0], 0), 0);
17595 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17598 strcpy (pattern
, "vpush%?.64\t{%P1");
17600 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17602 p
= strlen (pattern
);
17604 gcc_assert (REG_P (operands
[1]));
17606 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17607 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17609 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17611 strcpy (&pattern
[p
], "}");
17613 output_asm_insn (pattern
, operands
);
17618 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17619 number of bytes pushed. */
17622 vfp_emit_fstmd (int base_reg
, int count
)
17629 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17630 register pairs are stored by a store multiple insn. We avoid this
17631 by pushing an extra pair. */
17632 if (count
== 2 && !arm_arch6
)
17634 if (base_reg
== LAST_VFP_REGNUM
- 3)
17639 /* FSTMD may not store more than 16 doubleword registers at once. Split
17640 larger stores into multiple parts (up to a maximum of two, in
17645 /* NOTE: base_reg is an internal register number, so each D register
17647 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17648 saved
+= vfp_emit_fstmd (base_reg
, 16);
17652 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17653 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17655 reg
= gen_rtx_REG (DFmode
, base_reg
);
17658 XVECEXP (par
, 0, 0)
17659 = gen_rtx_SET (gen_frame_mem
17661 gen_rtx_PRE_MODIFY (Pmode
,
17664 (Pmode
, stack_pointer_rtx
,
17667 gen_rtx_UNSPEC (BLKmode
,
17668 gen_rtvec (1, reg
),
17669 UNSPEC_PUSH_MULT
));
17671 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17672 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17673 RTX_FRAME_RELATED_P (tmp
) = 1;
17674 XVECEXP (dwarf
, 0, 0) = tmp
;
17676 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17677 RTX_FRAME_RELATED_P (tmp
) = 1;
17678 XVECEXP (dwarf
, 0, 1) = tmp
;
17680 for (i
= 1; i
< count
; i
++)
17682 reg
= gen_rtx_REG (DFmode
, base_reg
);
17684 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17686 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17687 plus_constant (Pmode
,
17691 RTX_FRAME_RELATED_P (tmp
) = 1;
17692 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17695 par
= emit_insn (par
);
17696 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17697 RTX_FRAME_RELATED_P (par
) = 1;
17702 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17703 has the cmse_nonsecure_call attribute and returns false otherwise. */
17706 detect_cmse_nonsecure_call (tree addr
)
17711 tree fntype
= TREE_TYPE (addr
);
17712 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17713 TYPE_ATTRIBUTES (fntype
)))
17719 /* Emit a call instruction with pattern PAT. ADDR is the address of
17720 the call target. */
17723 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17727 insn
= emit_call_insn (pat
);
17729 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17730 If the call might use such an entry, add a use of the PIC register
17731 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17732 if (TARGET_VXWORKS_RTP
17735 && GET_CODE (addr
) == SYMBOL_REF
17736 && (SYMBOL_REF_DECL (addr
)
17737 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17738 : !SYMBOL_REF_LOCAL_P (addr
)))
17740 require_pic_register ();
17741 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17744 if (TARGET_AAPCS_BASED
)
17746 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17747 linker. We need to add an IP clobber to allow setting
17748 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17749 is not needed since it's a fixed register. */
17750 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17751 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17755 /* Output a 'call' insn. */
17757 output_call (rtx
*operands
)
17759 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17761 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17762 if (REGNO (operands
[0]) == LR_REGNUM
)
17764 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17765 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17768 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17770 if (TARGET_INTERWORK
|| arm_arch4t
)
17771 output_asm_insn ("bx%?\t%0", operands
);
17773 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17778 /* Output a move from arm registers to arm registers of a long double
17779 OPERANDS[0] is the destination.
17780 OPERANDS[1] is the source. */
17782 output_mov_long_double_arm_from_arm (rtx
*operands
)
17784 /* We have to be careful here because the two might overlap. */
17785 int dest_start
= REGNO (operands
[0]);
17786 int src_start
= REGNO (operands
[1]);
17790 if (dest_start
< src_start
)
17792 for (i
= 0; i
< 3; i
++)
17794 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17795 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17796 output_asm_insn ("mov%?\t%0, %1", ops
);
17801 for (i
= 2; i
>= 0; i
--)
17803 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17804 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17805 output_asm_insn ("mov%?\t%0, %1", ops
);
17813 arm_emit_movpair (rtx dest
, rtx src
)
17815 /* If the src is an immediate, simplify it. */
17816 if (CONST_INT_P (src
))
17818 HOST_WIDE_INT val
= INTVAL (src
);
17819 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17820 if ((val
>> 16) & 0x0000ffff)
17822 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17824 GEN_INT ((val
>> 16) & 0x0000ffff));
17825 rtx_insn
*insn
= get_last_insn ();
17826 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17830 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17831 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17832 rtx_insn
*insn
= get_last_insn ();
17833 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17836 /* Output a move between double words. It must be REG<-MEM
17839 output_move_double (rtx
*operands
, bool emit
, int *count
)
17841 enum rtx_code code0
= GET_CODE (operands
[0]);
17842 enum rtx_code code1
= GET_CODE (operands
[1]);
17847 /* The only case when this might happen is when
17848 you are looking at the length of a DImode instruction
17849 that has an invalid constant in it. */
17850 if (code0
== REG
&& code1
!= MEM
)
17852 gcc_assert (!emit
);
17859 unsigned int reg0
= REGNO (operands
[0]);
17861 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17863 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17865 switch (GET_CODE (XEXP (operands
[1], 0)))
17872 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17873 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
17875 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17880 gcc_assert (TARGET_LDRD
);
17882 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17889 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17891 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
17899 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
17901 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
17906 gcc_assert (TARGET_LDRD
);
17908 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
17913 /* Autoicrement addressing modes should never have overlapping
17914 base and destination registers, and overlapping index registers
17915 are already prohibited, so this doesn't need to worry about
17917 otherops
[0] = operands
[0];
17918 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17919 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17921 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17923 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17925 /* Registers overlap so split out the increment. */
17928 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17929 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
17936 /* Use a single insn if we can.
17937 FIXME: IWMMXT allows offsets larger than ldrd can
17938 handle, fix these up with a pair of ldr. */
17940 || !CONST_INT_P (otherops
[2])
17941 || (INTVAL (otherops
[2]) > -256
17942 && INTVAL (otherops
[2]) < 256))
17945 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
17951 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17952 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17962 /* Use a single insn if we can.
17963 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17964 fix these up with a pair of ldr. */
17966 || !CONST_INT_P (otherops
[2])
17967 || (INTVAL (otherops
[2]) > -256
17968 && INTVAL (otherops
[2]) < 256))
17971 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
17977 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17978 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17988 /* We might be able to use ldrd %0, %1 here. However the range is
17989 different to ldr/adr, and it is broken on some ARMv7-M
17990 implementations. */
17991 /* Use the second register of the pair to avoid problematic
17993 otherops
[1] = operands
[1];
17995 output_asm_insn ("adr%?\t%0, %1", otherops
);
17996 operands
[1] = otherops
[0];
18000 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18002 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18009 /* ??? This needs checking for thumb2. */
18011 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18012 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18014 otherops
[0] = operands
[0];
18015 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18016 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18018 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18020 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18022 switch ((int) INTVAL (otherops
[2]))
18026 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18032 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18038 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18042 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18043 operands
[1] = otherops
[0];
18045 && (REG_P (otherops
[2])
18047 || (CONST_INT_P (otherops
[2])
18048 && INTVAL (otherops
[2]) > -256
18049 && INTVAL (otherops
[2]) < 256)))
18051 if (reg_overlap_mentioned_p (operands
[0],
18054 /* Swap base and index registers over to
18055 avoid a conflict. */
18056 std::swap (otherops
[1], otherops
[2]);
18058 /* If both registers conflict, it will usually
18059 have been fixed by a splitter. */
18060 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18061 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18065 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18066 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18073 otherops
[0] = operands
[0];
18075 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18080 if (CONST_INT_P (otherops
[2]))
18084 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18085 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18087 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18093 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18099 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18106 return "ldrd%?\t%0, [%1]";
18108 return "ldmia%?\t%1, %M0";
18112 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18113 /* Take care of overlapping base/data reg. */
18114 if (reg_mentioned_p (operands
[0], operands
[1]))
18118 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18119 output_asm_insn ("ldr%?\t%0, %1", operands
);
18129 output_asm_insn ("ldr%?\t%0, %1", operands
);
18130 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18140 /* Constraints should ensure this. */
18141 gcc_assert (code0
== MEM
&& code1
== REG
);
18142 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18143 || (TARGET_ARM
&& TARGET_LDRD
));
18145 switch (GET_CODE (XEXP (operands
[0], 0)))
18151 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18153 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18158 gcc_assert (TARGET_LDRD
);
18160 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18167 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18169 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18177 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18179 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18184 gcc_assert (TARGET_LDRD
);
18186 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18191 otherops
[0] = operands
[1];
18192 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18193 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18195 /* IWMMXT allows offsets larger than ldrd can handle,
18196 fix these up with a pair of ldr. */
18198 && CONST_INT_P (otherops
[2])
18199 && (INTVAL(otherops
[2]) <= -256
18200 || INTVAL(otherops
[2]) >= 256))
18202 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18206 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18207 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18216 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18217 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18223 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18226 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18231 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18236 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18237 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18239 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18243 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18250 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18257 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18262 && (REG_P (otherops
[2])
18264 || (CONST_INT_P (otherops
[2])
18265 && INTVAL (otherops
[2]) > -256
18266 && INTVAL (otherops
[2]) < 256)))
18268 otherops
[0] = operands
[1];
18269 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18271 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18277 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18278 otherops
[1] = operands
[1];
18281 output_asm_insn ("str%?\t%1, %0", operands
);
18282 output_asm_insn ("str%?\t%H1, %0", otherops
);
18292 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18293 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18296 output_move_quad (rtx
*operands
)
18298 if (REG_P (operands
[0]))
18300 /* Load, or reg->reg move. */
18302 if (MEM_P (operands
[1]))
18304 switch (GET_CODE (XEXP (operands
[1], 0)))
18307 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18312 output_asm_insn ("adr%?\t%0, %1", operands
);
18313 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18317 gcc_unreachable ();
18325 gcc_assert (REG_P (operands
[1]));
18327 dest
= REGNO (operands
[0]);
18328 src
= REGNO (operands
[1]);
18330 /* This seems pretty dumb, but hopefully GCC won't try to do it
18333 for (i
= 0; i
< 4; i
++)
18335 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18336 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18337 output_asm_insn ("mov%?\t%0, %1", ops
);
18340 for (i
= 3; i
>= 0; i
--)
18342 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18343 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18344 output_asm_insn ("mov%?\t%0, %1", ops
);
18350 gcc_assert (MEM_P (operands
[0]));
18351 gcc_assert (REG_P (operands
[1]));
18352 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18354 switch (GET_CODE (XEXP (operands
[0], 0)))
18357 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18361 gcc_unreachable ();
18368 /* Output a VFP load or store instruction. */
18371 output_move_vfp (rtx
*operands
)
18373 rtx reg
, mem
, addr
, ops
[2];
18374 int load
= REG_P (operands
[0]);
18375 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18376 int sp
= (!TARGET_VFP_FP16INST
18377 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18378 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18383 reg
= operands
[!load
];
18384 mem
= operands
[load
];
18386 mode
= GET_MODE (reg
);
18388 gcc_assert (REG_P (reg
));
18389 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18390 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18396 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18397 gcc_assert (MEM_P (mem
));
18399 addr
= XEXP (mem
, 0);
18401 switch (GET_CODE (addr
))
18404 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18405 ops
[0] = XEXP (addr
, 0);
18410 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18411 ops
[0] = XEXP (addr
, 0);
18416 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18422 sprintf (buff
, templ
,
18423 load
? "ld" : "st",
18424 dp
? "64" : sp
? "32" : "16",
18426 integer_p
? "\t%@ int" : "");
18427 output_asm_insn (buff
, ops
);
18432 /* Output a Neon double-word or quad-word load or store, or a load
18433 or store for larger structure modes.
18435 WARNING: The ordering of elements is weird in big-endian mode,
18436 because the EABI requires that vectors stored in memory appear
18437 as though they were stored by a VSTM, as required by the EABI.
18438 GCC RTL defines element ordering based on in-memory order.
18439 This can be different from the architectural ordering of elements
18440 within a NEON register. The intrinsics defined in arm_neon.h use the
18441 NEON register element ordering, not the GCC RTL element ordering.
18443 For example, the in-memory ordering of a big-endian a quadword
18444 vector with 16-bit elements when stored from register pair {d0,d1}
18445 will be (lowest address first, d0[N] is NEON register element N):
18447 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18449 When necessary, quadword registers (dN, dN+1) are moved to ARM
18450 registers from rN in the order:
18452 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18454 So that STM/LDM can be used on vectors in ARM registers, and the
18455 same memory layout will result as if VSTM/VLDM were used.
18457 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18458 possible, which allows use of appropriate alignment tags.
18459 Note that the choice of "64" is independent of the actual vector
18460 element size; this size simply ensures that the behavior is
18461 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18463 Due to limitations of those instructions, use of VST1.64/VLD1.64
18464 is not possible if:
18465 - the address contains PRE_DEC, or
18466 - the mode refers to more than 4 double-word registers
18468 In those cases, it would be possible to replace VSTM/VLDM by a
18469 sequence of instructions; this is not currently implemented since
18470 this is not certain to actually improve performance. */
18473 output_move_neon (rtx
*operands
)
18475 rtx reg
, mem
, addr
, ops
[2];
18476 int regno
, nregs
, load
= REG_P (operands
[0]);
18481 reg
= operands
[!load
];
18482 mem
= operands
[load
];
18484 mode
= GET_MODE (reg
);
18486 gcc_assert (REG_P (reg
));
18487 regno
= REGNO (reg
);
18488 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18489 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18490 || NEON_REGNO_OK_FOR_QUAD (regno
));
18491 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18492 || VALID_NEON_QREG_MODE (mode
)
18493 || VALID_NEON_STRUCT_MODE (mode
));
18494 gcc_assert (MEM_P (mem
));
18496 addr
= XEXP (mem
, 0);
18498 /* Strip off const from addresses like (const (plus (...))). */
18499 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18500 addr
= XEXP (addr
, 0);
18502 switch (GET_CODE (addr
))
18505 /* We have to use vldm / vstm for too-large modes. */
18508 templ
= "v%smia%%?\t%%0!, %%h1";
18509 ops
[0] = XEXP (addr
, 0);
18513 templ
= "v%s1.64\t%%h1, %%A0";
18520 /* We have to use vldm / vstm in this case, since there is no
18521 pre-decrement form of the vld1 / vst1 instructions. */
18522 templ
= "v%smdb%%?\t%%0!, %%h1";
18523 ops
[0] = XEXP (addr
, 0);
18528 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18529 gcc_unreachable ();
18532 /* We have to use vldm / vstm for too-large modes. */
18536 templ
= "v%smia%%?\t%%m0, %%h1";
18538 templ
= "v%s1.64\t%%h1, %%A0";
18544 /* Fall through. */
18550 for (i
= 0; i
< nregs
; i
++)
18552 /* We're only using DImode here because it's a convenient size. */
18553 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18554 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18555 if (reg_overlap_mentioned_p (ops
[0], mem
))
18557 gcc_assert (overlap
== -1);
18562 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18563 output_asm_insn (buff
, ops
);
18568 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18569 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18570 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18571 output_asm_insn (buff
, ops
);
18578 gcc_unreachable ();
18581 sprintf (buff
, templ
, load
? "ld" : "st");
18582 output_asm_insn (buff
, ops
);
18587 /* Compute and return the length of neon_mov<mode>, where <mode> is
18588 one of VSTRUCT modes: EI, OI, CI or XI. */
18590 arm_attr_length_move_neon (rtx_insn
*insn
)
18592 rtx reg
, mem
, addr
;
18596 extract_insn_cached (insn
);
18598 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18600 mode
= GET_MODE (recog_data
.operand
[0]);
18611 gcc_unreachable ();
18615 load
= REG_P (recog_data
.operand
[0]);
18616 reg
= recog_data
.operand
[!load
];
18617 mem
= recog_data
.operand
[load
];
18619 gcc_assert (MEM_P (mem
));
18621 mode
= GET_MODE (reg
);
18622 addr
= XEXP (mem
, 0);
18624 /* Strip off const from addresses like (const (plus (...))). */
18625 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18626 addr
= XEXP (addr
, 0);
18628 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18630 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18637 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18641 arm_address_offset_is_imm (rtx_insn
*insn
)
18645 extract_insn_cached (insn
);
18647 if (REG_P (recog_data
.operand
[0]))
18650 mem
= recog_data
.operand
[0];
18652 gcc_assert (MEM_P (mem
));
18654 addr
= XEXP (mem
, 0);
18657 || (GET_CODE (addr
) == PLUS
18658 && REG_P (XEXP (addr
, 0))
18659 && CONST_INT_P (XEXP (addr
, 1))))
18665 /* Output an ADD r, s, #n where n may be too big for one instruction.
18666 If adding zero to one register, output nothing. */
18668 output_add_immediate (rtx
*operands
)
18670 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18672 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18675 output_multi_immediate (operands
,
18676 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18679 output_multi_immediate (operands
,
18680 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18687 /* Output a multiple immediate operation.
18688 OPERANDS is the vector of operands referred to in the output patterns.
18689 INSTR1 is the output pattern to use for the first constant.
18690 INSTR2 is the output pattern to use for subsequent constants.
18691 IMMED_OP is the index of the constant slot in OPERANDS.
18692 N is the constant value. */
18693 static const char *
18694 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18695 int immed_op
, HOST_WIDE_INT n
)
18697 #if HOST_BITS_PER_WIDE_INT > 32
18703 /* Quick and easy output. */
18704 operands
[immed_op
] = const0_rtx
;
18705 output_asm_insn (instr1
, operands
);
18710 const char * instr
= instr1
;
18712 /* Note that n is never zero here (which would give no output). */
18713 for (i
= 0; i
< 32; i
+= 2)
18717 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18718 output_asm_insn (instr
, operands
);
18728 /* Return the name of a shifter operation. */
18729 static const char *
18730 arm_shift_nmem(enum rtx_code code
)
18735 return ARM_LSL_NAME
;
18751 /* Return the appropriate ARM instruction for the operation code.
18752 The returned result should not be overwritten. OP is the rtx of the
18753 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18756 arithmetic_instr (rtx op
, int shift_first_arg
)
18758 switch (GET_CODE (op
))
18764 return shift_first_arg
? "rsb" : "sub";
18779 return arm_shift_nmem(GET_CODE(op
));
18782 gcc_unreachable ();
18786 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18787 for the operation code. The returned result should not be overwritten.
18788 OP is the rtx code of the shift.
18789 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18791 static const char *
18792 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18795 enum rtx_code code
= GET_CODE (op
);
18800 if (!CONST_INT_P (XEXP (op
, 1)))
18802 output_operand_lossage ("invalid shift operand");
18807 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18815 mnem
= arm_shift_nmem(code
);
18816 if (CONST_INT_P (XEXP (op
, 1)))
18818 *amountp
= INTVAL (XEXP (op
, 1));
18820 else if (REG_P (XEXP (op
, 1)))
18827 output_operand_lossage ("invalid shift operand");
18833 /* We never have to worry about the amount being other than a
18834 power of 2, since this case can never be reloaded from a reg. */
18835 if (!CONST_INT_P (XEXP (op
, 1)))
18837 output_operand_lossage ("invalid shift operand");
18841 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18843 /* Amount must be a power of two. */
18844 if (*amountp
& (*amountp
- 1))
18846 output_operand_lossage ("invalid shift operand");
18850 *amountp
= exact_log2 (*amountp
);
18851 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18852 return ARM_LSL_NAME
;
18855 output_operand_lossage ("invalid shift operand");
18859 /* This is not 100% correct, but follows from the desire to merge
18860 multiplication by a power of 2 with the recognizer for a
18861 shift. >=32 is not a valid shift for "lsl", so we must try and
18862 output a shift that produces the correct arithmetical result.
18863 Using lsr #32 is identical except for the fact that the carry bit
18864 is not set correctly if we set the flags; but we never use the
18865 carry bit from such an operation, so we can ignore that. */
18866 if (code
== ROTATERT
)
18867 /* Rotate is just modulo 32. */
18869 else if (*amountp
!= (*amountp
& 31))
18871 if (code
== ASHIFT
)
18876 /* Shifts of 0 are no-ops. */
18883 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18884 because /bin/as is horribly restrictive. The judgement about
18885 whether or not each character is 'printable' (and can be output as
18886 is) or not (and must be printed with an octal escape) must be made
18887 with reference to the *host* character set -- the situation is
18888 similar to that discussed in the comments above pp_c_char in
18889 c-pretty-print.c. */
18891 #define MAX_ASCII_LEN 51
18894 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18897 int len_so_far
= 0;
18899 fputs ("\t.ascii\t\"", stream
);
18901 for (i
= 0; i
< len
; i
++)
18905 if (len_so_far
>= MAX_ASCII_LEN
)
18907 fputs ("\"\n\t.ascii\t\"", stream
);
18913 if (c
== '\\' || c
== '\"')
18915 putc ('\\', stream
);
18923 fprintf (stream
, "\\%03o", c
);
18928 fputs ("\"\n", stream
);
18931 /* Whether a register is callee saved or not. This is necessary because high
18932 registers are marked as caller saved when optimizing for size on Thumb-1
18933 targets despite being callee saved in order to avoid using them. */
18934 #define callee_saved_reg_p(reg) \
18935 (!call_used_regs[reg] \
18936 || (TARGET_THUMB1 && optimize_size \
18937 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18939 /* Compute the register save mask for registers 0 through 12
18940 inclusive. This code is used by arm_compute_save_reg_mask. */
18942 static unsigned long
18943 arm_compute_save_reg0_reg12_mask (void)
18945 unsigned long func_type
= arm_current_func_type ();
18946 unsigned long save_reg_mask
= 0;
18949 if (IS_INTERRUPT (func_type
))
18951 unsigned int max_reg
;
18952 /* Interrupt functions must not corrupt any registers,
18953 even call clobbered ones. If this is a leaf function
18954 we can just examine the registers used by the RTL, but
18955 otherwise we have to assume that whatever function is
18956 called might clobber anything, and so we have to save
18957 all the call-clobbered registers as well. */
18958 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18959 /* FIQ handlers have registers r8 - r12 banked, so
18960 we only need to check r0 - r7, Normal ISRs only
18961 bank r14 and r15, so we must check up to r12.
18962 r13 is the stack pointer which is always preserved,
18963 so we do not need to consider it here. */
18968 for (reg
= 0; reg
<= max_reg
; reg
++)
18969 if (df_regs_ever_live_p (reg
)
18970 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18971 save_reg_mask
|= (1 << reg
);
18973 /* Also save the pic base register if necessary. */
18975 && !TARGET_SINGLE_PIC_BASE
18976 && arm_pic_register
!= INVALID_REGNUM
18977 && crtl
->uses_pic_offset_table
)
18978 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18980 else if (IS_VOLATILE(func_type
))
18982 /* For noreturn functions we historically omitted register saves
18983 altogether. However this really messes up debugging. As a
18984 compromise save just the frame pointers. Combined with the link
18985 register saved elsewhere this should be sufficient to get
18987 if (frame_pointer_needed
)
18988 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18989 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18990 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18991 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18992 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18996 /* In the normal case we only need to save those registers
18997 which are call saved and which are used by this function. */
18998 for (reg
= 0; reg
<= 11; reg
++)
18999 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19000 save_reg_mask
|= (1 << reg
);
19002 /* Handle the frame pointer as a special case. */
19003 if (frame_pointer_needed
)
19004 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19006 /* If we aren't loading the PIC register,
19007 don't stack it even though it may be live. */
19009 && !TARGET_SINGLE_PIC_BASE
19010 && arm_pic_register
!= INVALID_REGNUM
19011 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19012 || crtl
->uses_pic_offset_table
))
19013 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19015 /* The prologue will copy SP into R0, so save it. */
19016 if (IS_STACKALIGN (func_type
))
19017 save_reg_mask
|= 1;
19020 /* Save registers so the exception handler can modify them. */
19021 if (crtl
->calls_eh_return
)
19027 reg
= EH_RETURN_DATA_REGNO (i
);
19028 if (reg
== INVALID_REGNUM
)
19030 save_reg_mask
|= 1 << reg
;
19034 return save_reg_mask
;
19037 /* Return true if r3 is live at the start of the function. */
19040 arm_r3_live_at_start_p (void)
19042 /* Just look at cfg info, which is still close enough to correct at this
19043 point. This gives false positives for broken functions that might use
19044 uninitialized data that happens to be allocated in r3, but who cares? */
19045 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19048 /* Compute the number of bytes used to store the static chain register on the
19049 stack, above the stack frame. We need to know this accurately to get the
19050 alignment of the rest of the stack frame correct. */
19053 arm_compute_static_chain_stack_bytes (void)
19055 /* See the defining assertion in arm_expand_prologue. */
19056 if (IS_NESTED (arm_current_func_type ())
19057 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19058 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19059 && !df_regs_ever_live_p (LR_REGNUM
)))
19060 && arm_r3_live_at_start_p ()
19061 && crtl
->args
.pretend_args_size
== 0)
19067 /* Compute a bit mask of which registers need to be
19068 saved on the stack for the current function.
19069 This is used by arm_get_frame_offsets, which may add extra registers. */
19071 static unsigned long
19072 arm_compute_save_reg_mask (void)
19074 unsigned int save_reg_mask
= 0;
19075 unsigned long func_type
= arm_current_func_type ();
19078 if (IS_NAKED (func_type
))
19079 /* This should never really happen. */
19082 /* If we are creating a stack frame, then we must save the frame pointer,
19083 IP (which will hold the old stack pointer), LR and the PC. */
19084 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19086 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19089 | (1 << PC_REGNUM
);
19091 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19093 /* Decide if we need to save the link register.
19094 Interrupt routines have their own banked link register,
19095 so they never need to save it.
19096 Otherwise if we do not use the link register we do not need to save
19097 it. If we are pushing other registers onto the stack however, we
19098 can save an instruction in the epilogue by pushing the link register
19099 now and then popping it back into the PC. This incurs extra memory
19100 accesses though, so we only do it when optimizing for size, and only
19101 if we know that we will not need a fancy return sequence. */
19102 if (df_regs_ever_live_p (LR_REGNUM
)
19105 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19106 && !crtl
->tail_call_emit
19107 && !crtl
->calls_eh_return
))
19108 save_reg_mask
|= 1 << LR_REGNUM
;
19110 if (cfun
->machine
->lr_save_eliminated
)
19111 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19113 if (TARGET_REALLY_IWMMXT
19114 && ((bit_count (save_reg_mask
)
19115 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19116 arm_compute_static_chain_stack_bytes())
19119 /* The total number of registers that are going to be pushed
19120 onto the stack is odd. We need to ensure that the stack
19121 is 64-bit aligned before we start to save iWMMXt registers,
19122 and also before we start to create locals. (A local variable
19123 might be a double or long long which we will load/store using
19124 an iWMMXt instruction). Therefore we need to push another
19125 ARM register, so that the stack will be 64-bit aligned. We
19126 try to avoid using the arg registers (r0 -r3) as they might be
19127 used to pass values in a tail call. */
19128 for (reg
= 4; reg
<= 12; reg
++)
19129 if ((save_reg_mask
& (1 << reg
)) == 0)
19133 save_reg_mask
|= (1 << reg
);
19136 cfun
->machine
->sibcall_blocked
= 1;
19137 save_reg_mask
|= (1 << 3);
19141 /* We may need to push an additional register for use initializing the
19142 PIC base register. */
19143 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19144 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19146 reg
= thumb_find_work_register (1 << 4);
19147 if (!call_used_regs
[reg
])
19148 save_reg_mask
|= (1 << reg
);
19151 return save_reg_mask
;
19154 /* Compute a bit mask of which registers need to be
19155 saved on the stack for the current function. */
19156 static unsigned long
19157 thumb1_compute_save_reg_mask (void)
19159 unsigned long mask
;
19163 for (reg
= 0; reg
< 12; reg
++)
19164 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19167 /* Handle the frame pointer as a special case. */
19168 if (frame_pointer_needed
)
19169 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19172 && !TARGET_SINGLE_PIC_BASE
19173 && arm_pic_register
!= INVALID_REGNUM
19174 && crtl
->uses_pic_offset_table
)
19175 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19177 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19178 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19179 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19181 /* LR will also be pushed if any lo regs are pushed. */
19182 if (mask
& 0xff || thumb_force_lr_save ())
19183 mask
|= (1 << LR_REGNUM
);
19185 /* Make sure we have a low work register if we need one.
19186 We will need one if we are going to push a high register,
19187 but we are not currently intending to push a low register. */
19188 if ((mask
& 0xff) == 0
19189 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19191 /* Use thumb_find_work_register to choose which register
19192 we will use. If the register is live then we will
19193 have to push it. Use LAST_LO_REGNUM as our fallback
19194 choice for the register to select. */
19195 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19196 /* Make sure the register returned by thumb_find_work_register is
19197 not part of the return value. */
19198 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19199 reg
= LAST_LO_REGNUM
;
19201 if (callee_saved_reg_p (reg
))
19205 /* The 504 below is 8 bytes less than 512 because there are two possible
19206 alignment words. We can't tell here if they will be present or not so we
19207 have to play it safe and assume that they are. */
19208 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19209 ROUND_UP_WORD (get_frame_size ()) +
19210 crtl
->outgoing_args_size
) >= 504)
19212 /* This is the same as the code in thumb1_expand_prologue() which
19213 determines which register to use for stack decrement. */
19214 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19215 if (mask
& (1 << reg
))
19218 if (reg
> LAST_LO_REGNUM
)
19220 /* Make sure we have a register available for stack decrement. */
19221 mask
|= 1 << LAST_LO_REGNUM
;
19229 /* Return the number of bytes required to save VFP registers. */
19231 arm_get_vfp_saved_size (void)
19233 unsigned int regno
;
19238 /* Space for saved VFP registers. */
19239 if (TARGET_HARD_FLOAT
)
19242 for (regno
= FIRST_VFP_REGNUM
;
19243 regno
< LAST_VFP_REGNUM
;
19246 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19247 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19251 /* Workaround ARM10 VFPr1 bug. */
19252 if (count
== 2 && !arm_arch6
)
19254 saved
+= count
* 8;
19263 if (count
== 2 && !arm_arch6
)
19265 saved
+= count
* 8;
19272 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19273 everything bar the final return instruction. If simple_return is true,
19274 then do not output epilogue, because it has already been emitted in RTL. */
19276 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19277 bool simple_return
)
19279 char conditional
[10];
19282 unsigned long live_regs_mask
;
19283 unsigned long func_type
;
19284 arm_stack_offsets
*offsets
;
19286 func_type
= arm_current_func_type ();
19288 if (IS_NAKED (func_type
))
19291 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19293 /* If this function was declared non-returning, and we have
19294 found a tail call, then we have to trust that the called
19295 function won't return. */
19300 /* Otherwise, trap an attempted return by aborting. */
19302 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19304 assemble_external_libcall (ops
[1]);
19305 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19311 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19313 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19315 cfun
->machine
->return_used_this_function
= 1;
19317 offsets
= arm_get_frame_offsets ();
19318 live_regs_mask
= offsets
->saved_regs_mask
;
19320 if (!simple_return
&& live_regs_mask
)
19322 const char * return_reg
;
19324 /* If we do not have any special requirements for function exit
19325 (e.g. interworking) then we can load the return address
19326 directly into the PC. Otherwise we must load it into LR. */
19328 && !IS_CMSE_ENTRY (func_type
)
19329 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19330 return_reg
= reg_names
[PC_REGNUM
];
19332 return_reg
= reg_names
[LR_REGNUM
];
19334 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19336 /* There are three possible reasons for the IP register
19337 being saved. 1) a stack frame was created, in which case
19338 IP contains the old stack pointer, or 2) an ISR routine
19339 corrupted it, or 3) it was saved to align the stack on
19340 iWMMXt. In case 1, restore IP into SP, otherwise just
19342 if (frame_pointer_needed
)
19344 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19345 live_regs_mask
|= (1 << SP_REGNUM
);
19348 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19351 /* On some ARM architectures it is faster to use LDR rather than
19352 LDM to load a single register. On other architectures, the
19353 cost is the same. In 26 bit mode, or for exception handlers,
19354 we have to use LDM to load the PC so that the CPSR is also
19356 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19357 if (live_regs_mask
== (1U << reg
))
19360 if (reg
<= LAST_ARM_REGNUM
19361 && (reg
!= LR_REGNUM
19363 || ! IS_INTERRUPT (func_type
)))
19365 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19366 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19373 /* Generate the load multiple instruction to restore the
19374 registers. Note we can get here, even if
19375 frame_pointer_needed is true, but only if sp already
19376 points to the base of the saved core registers. */
19377 if (live_regs_mask
& (1 << SP_REGNUM
))
19379 unsigned HOST_WIDE_INT stack_adjust
;
19381 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19382 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19384 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19385 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19388 /* If we can't use ldmib (SA110 bug),
19389 then try to pop r3 instead. */
19391 live_regs_mask
|= 1 << 3;
19393 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19396 /* For interrupt returns we have to use an LDM rather than
19397 a POP so that we can use the exception return variant. */
19398 else if (IS_INTERRUPT (func_type
))
19399 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19401 sprintf (instr
, "pop%s\t{", conditional
);
19403 p
= instr
+ strlen (instr
);
19405 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19406 if (live_regs_mask
& (1 << reg
))
19408 int l
= strlen (reg_names
[reg
]);
19414 memcpy (p
, ", ", 2);
19418 memcpy (p
, "%|", 2);
19419 memcpy (p
+ 2, reg_names
[reg
], l
);
19423 if (live_regs_mask
& (1 << LR_REGNUM
))
19425 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19426 /* If returning from an interrupt, restore the CPSR. */
19427 if (IS_INTERRUPT (func_type
))
19434 output_asm_insn (instr
, & operand
);
19436 /* See if we need to generate an extra instruction to
19437 perform the actual function return. */
19439 && func_type
!= ARM_FT_INTERWORKED
19440 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19442 /* The return has already been handled
19443 by loading the LR into the PC. */
19450 switch ((int) ARM_FUNC_TYPE (func_type
))
19454 /* ??? This is wrong for unified assembly syntax. */
19455 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19458 case ARM_FT_INTERWORKED
:
19459 gcc_assert (arm_arch5
|| arm_arch4t
);
19460 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19463 case ARM_FT_EXCEPTION
:
19464 /* ??? This is wrong for unified assembly syntax. */
19465 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19469 if (IS_CMSE_ENTRY (func_type
))
19471 /* Check if we have to clear the 'GE bits' which is only used if
19472 parallel add and subtraction instructions are available. */
19473 if (TARGET_INT_SIMD
)
19474 snprintf (instr
, sizeof (instr
),
19475 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19477 snprintf (instr
, sizeof (instr
),
19478 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19480 output_asm_insn (instr
, & operand
);
19481 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19483 /* Clear the cumulative exception-status bits (0-4,7) and the
19484 condition code bits (28-31) of the FPSCR. We need to
19485 remember to clear the first scratch register used (IP) and
19486 save and restore the second (r4). */
19487 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19488 output_asm_insn (instr
, & operand
);
19489 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19490 output_asm_insn (instr
, & operand
);
19491 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19492 output_asm_insn (instr
, & operand
);
19493 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19494 output_asm_insn (instr
, & operand
);
19495 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19496 output_asm_insn (instr
, & operand
);
19497 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19498 output_asm_insn (instr
, & operand
);
19499 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19500 output_asm_insn (instr
, & operand
);
19501 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19502 output_asm_insn (instr
, & operand
);
19504 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19506 /* Use bx if it's available. */
19507 else if (arm_arch5
|| arm_arch4t
)
19508 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19510 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19514 output_asm_insn (instr
, & operand
);
19520 /* Output in FILE asm statements needed to declare the NAME of the function
19521 defined by its DECL node. */
19524 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19526 size_t cmse_name_len
;
19527 char *cmse_name
= 0;
19528 char cmse_prefix
[] = "__acle_se_";
19530 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19531 extra function label for each function with the 'cmse_nonsecure_entry'
19532 attribute. This extra function label should be prepended with
19533 '__acle_se_', telling the linker that it needs to create secure gateway
19534 veneers for this function. */
19535 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19536 DECL_ATTRIBUTES (decl
)))
19538 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19539 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19540 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19541 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19543 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19544 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19547 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19548 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19549 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19550 ASM_OUTPUT_LABEL (file
, name
);
19553 ASM_OUTPUT_LABEL (file
, cmse_name
);
19555 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19558 /* Write the function name into the code section, directly preceding
19559 the function prologue.
19561 Code will be output similar to this:
19563 .ascii "arm_poke_function_name", 0
19566 .word 0xff000000 + (t1 - t0)
19567 arm_poke_function_name
19569 stmfd sp!, {fp, ip, lr, pc}
19572 When performing a stack backtrace, code can inspect the value
19573 of 'pc' stored at 'fp' + 0. If the trace function then looks
19574 at location pc - 12 and the top 8 bits are set, then we know
19575 that there is a function name embedded immediately preceding this
19576 location and has length ((pc[-3]) & 0xff000000).
19578 We assume that pc is declared as a pointer to an unsigned long.
19580 It is of no benefit to output the function name if we are assembling
19581 a leaf function. These function types will not contain a stack
19582 backtrace structure, therefore it is not possible to determine the
19585 arm_poke_function_name (FILE *stream
, const char *name
)
19587 unsigned long alignlength
;
19588 unsigned long length
;
19591 length
= strlen (name
) + 1;
19592 alignlength
= ROUND_UP_WORD (length
);
19594 ASM_OUTPUT_ASCII (stream
, name
, length
);
19595 ASM_OUTPUT_ALIGN (stream
, 2);
19596 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19597 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19600 /* Place some comments into the assembler stream
19601 describing the current function. */
19603 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19605 unsigned long func_type
;
19607 /* Sanity check. */
19608 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19610 func_type
= arm_current_func_type ();
19612 switch ((int) ARM_FUNC_TYPE (func_type
))
19615 case ARM_FT_NORMAL
:
19617 case ARM_FT_INTERWORKED
:
19618 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19621 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19624 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19626 case ARM_FT_EXCEPTION
:
19627 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19631 if (IS_NAKED (func_type
))
19632 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19634 if (IS_VOLATILE (func_type
))
19635 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19637 if (IS_NESTED (func_type
))
19638 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19639 if (IS_STACKALIGN (func_type
))
19640 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19641 if (IS_CMSE_ENTRY (func_type
))
19642 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19644 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19646 crtl
->args
.pretend_args_size
, frame_size
);
19648 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19649 frame_pointer_needed
,
19650 cfun
->machine
->uses_anonymous_args
);
19652 if (cfun
->machine
->lr_save_eliminated
)
19653 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19655 if (crtl
->calls_eh_return
)
19656 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19661 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19662 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19664 arm_stack_offsets
*offsets
;
19670 /* Emit any call-via-reg trampolines that are needed for v4t support
19671 of call_reg and call_value_reg type insns. */
19672 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19674 rtx label
= cfun
->machine
->call_via
[regno
];
19678 switch_to_section (function_section (current_function_decl
));
19679 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19680 CODE_LABEL_NUMBER (label
));
19681 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19685 /* ??? Probably not safe to set this here, since it assumes that a
19686 function will be emitted as assembly immediately after we generate
19687 RTL for it. This does not happen for inline functions. */
19688 cfun
->machine
->return_used_this_function
= 0;
19690 else /* TARGET_32BIT */
19692 /* We need to take into account any stack-frame rounding. */
19693 offsets
= arm_get_frame_offsets ();
19695 gcc_assert (!use_return_insn (FALSE
, NULL
)
19696 || (cfun
->machine
->return_used_this_function
!= 0)
19697 || offsets
->saved_regs
== offsets
->outgoing_args
19698 || frame_pointer_needed
);
19702 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19703 STR and STRD. If an even number of registers are being pushed, one
19704 or more STRD patterns are created for each register pair. If an
19705 odd number of registers are pushed, emit an initial STR followed by
19706 as many STRD instructions as are needed. This works best when the
19707 stack is initially 64-bit aligned (the normal case), since it
19708 ensures that each STRD is also 64-bit aligned. */
19710 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19715 rtx par
= NULL_RTX
;
19716 rtx dwarf
= NULL_RTX
;
19720 num_regs
= bit_count (saved_regs_mask
);
19722 /* Must be at least one register to save, and can't save SP or PC. */
19723 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19724 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19725 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19727 /* Create sequence for DWARF info. All the frame-related data for
19728 debugging is held in this wrapper. */
19729 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19731 /* Describe the stack adjustment. */
19732 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19733 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19734 RTX_FRAME_RELATED_P (tmp
) = 1;
19735 XVECEXP (dwarf
, 0, 0) = tmp
;
19737 /* Find the first register. */
19738 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19743 /* If there's an odd number of registers to push. Start off by
19744 pushing a single register. This ensures that subsequent strd
19745 operations are dword aligned (assuming that SP was originally
19746 64-bit aligned). */
19747 if ((num_regs
& 1) != 0)
19749 rtx reg
, mem
, insn
;
19751 reg
= gen_rtx_REG (SImode
, regno
);
19753 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19754 stack_pointer_rtx
));
19756 mem
= gen_frame_mem (Pmode
,
19758 (Pmode
, stack_pointer_rtx
,
19759 plus_constant (Pmode
, stack_pointer_rtx
,
19762 tmp
= gen_rtx_SET (mem
, reg
);
19763 RTX_FRAME_RELATED_P (tmp
) = 1;
19764 insn
= emit_insn (tmp
);
19765 RTX_FRAME_RELATED_P (insn
) = 1;
19766 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19767 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19768 RTX_FRAME_RELATED_P (tmp
) = 1;
19771 XVECEXP (dwarf
, 0, i
) = tmp
;
19775 while (i
< num_regs
)
19776 if (saved_regs_mask
& (1 << regno
))
19778 rtx reg1
, reg2
, mem1
, mem2
;
19779 rtx tmp0
, tmp1
, tmp2
;
19782 /* Find the register to pair with this one. */
19783 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19787 reg1
= gen_rtx_REG (SImode
, regno
);
19788 reg2
= gen_rtx_REG (SImode
, regno2
);
19795 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19798 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19800 -4 * (num_regs
- 1)));
19801 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19802 plus_constant (Pmode
, stack_pointer_rtx
,
19804 tmp1
= gen_rtx_SET (mem1
, reg1
);
19805 tmp2
= gen_rtx_SET (mem2
, reg2
);
19806 RTX_FRAME_RELATED_P (tmp0
) = 1;
19807 RTX_FRAME_RELATED_P (tmp1
) = 1;
19808 RTX_FRAME_RELATED_P (tmp2
) = 1;
19809 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19810 XVECEXP (par
, 0, 0) = tmp0
;
19811 XVECEXP (par
, 0, 1) = tmp1
;
19812 XVECEXP (par
, 0, 2) = tmp2
;
19813 insn
= emit_insn (par
);
19814 RTX_FRAME_RELATED_P (insn
) = 1;
19815 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19819 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19822 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19825 tmp1
= gen_rtx_SET (mem1
, reg1
);
19826 tmp2
= gen_rtx_SET (mem2
, reg2
);
19827 RTX_FRAME_RELATED_P (tmp1
) = 1;
19828 RTX_FRAME_RELATED_P (tmp2
) = 1;
19829 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19830 XVECEXP (par
, 0, 0) = tmp1
;
19831 XVECEXP (par
, 0, 1) = tmp2
;
19835 /* Create unwind information. This is an approximation. */
19836 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19837 plus_constant (Pmode
,
19841 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19842 plus_constant (Pmode
,
19847 RTX_FRAME_RELATED_P (tmp1
) = 1;
19848 RTX_FRAME_RELATED_P (tmp2
) = 1;
19849 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19850 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19852 regno
= regno2
+ 1;
19860 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19861 whenever possible, otherwise it emits single-word stores. The first store
19862 also allocates stack space for all saved registers, using writeback with
19863 post-addressing mode. All other stores use offset addressing. If no STRD
19864 can be emitted, this function emits a sequence of single-word stores,
19865 and not an STM as before, because single-word stores provide more freedom
19866 scheduling and can be turned into an STM by peephole optimizations. */
19868 arm_emit_strd_push (unsigned long saved_regs_mask
)
19871 int i
, j
, dwarf_index
= 0;
19873 rtx dwarf
= NULL_RTX
;
19874 rtx insn
= NULL_RTX
;
19877 /* TODO: A more efficient code can be emitted by changing the
19878 layout, e.g., first push all pairs that can use STRD to keep the
19879 stack aligned, and then push all other registers. */
19880 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19881 if (saved_regs_mask
& (1 << i
))
19884 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19885 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19886 gcc_assert (num_regs
> 0);
19888 /* Create sequence for DWARF info. */
19889 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19891 /* For dwarf info, we generate explicit stack update. */
19892 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19893 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19894 RTX_FRAME_RELATED_P (tmp
) = 1;
19895 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19897 /* Save registers. */
19898 offset
= - 4 * num_regs
;
19900 while (j
<= LAST_ARM_REGNUM
)
19901 if (saved_regs_mask
& (1 << j
))
19904 && (saved_regs_mask
& (1 << (j
+ 1))))
19906 /* Current register and previous register form register pair for
19907 which STRD can be generated. */
19910 /* Allocate stack space for all saved registers. */
19911 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19912 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19913 mem
= gen_frame_mem (DImode
, tmp
);
19916 else if (offset
> 0)
19917 mem
= gen_frame_mem (DImode
,
19918 plus_constant (Pmode
,
19922 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19924 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19925 RTX_FRAME_RELATED_P (tmp
) = 1;
19926 tmp
= emit_insn (tmp
);
19928 /* Record the first store insn. */
19929 if (dwarf_index
== 1)
19932 /* Generate dwarf info. */
19933 mem
= gen_frame_mem (SImode
,
19934 plus_constant (Pmode
,
19937 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19938 RTX_FRAME_RELATED_P (tmp
) = 1;
19939 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19941 mem
= gen_frame_mem (SImode
,
19942 plus_constant (Pmode
,
19945 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
19946 RTX_FRAME_RELATED_P (tmp
) = 1;
19947 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19954 /* Emit a single word store. */
19957 /* Allocate stack space for all saved registers. */
19958 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19959 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19960 mem
= gen_frame_mem (SImode
, tmp
);
19963 else if (offset
> 0)
19964 mem
= gen_frame_mem (SImode
,
19965 plus_constant (Pmode
,
19969 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19971 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19972 RTX_FRAME_RELATED_P (tmp
) = 1;
19973 tmp
= emit_insn (tmp
);
19975 /* Record the first store insn. */
19976 if (dwarf_index
== 1)
19979 /* Generate dwarf info. */
19980 mem
= gen_frame_mem (SImode
,
19981 plus_constant(Pmode
,
19984 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19985 RTX_FRAME_RELATED_P (tmp
) = 1;
19986 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19995 /* Attach dwarf info to the first insn we generate. */
19996 gcc_assert (insn
!= NULL_RTX
);
19997 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19998 RTX_FRAME_RELATED_P (insn
) = 1;
20001 /* Generate and emit an insn that we will recognize as a push_multi.
20002 Unfortunately, since this insn does not reflect very well the actual
20003 semantics of the operation, we need to annotate the insn for the benefit
20004 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20005 MASK for registers that should be annotated for DWARF2 frame unwind
20008 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20011 int num_dwarf_regs
= 0;
20015 int dwarf_par_index
;
20018 /* We don't record the PC in the dwarf frame information. */
20019 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20021 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20023 if (mask
& (1 << i
))
20025 if (dwarf_regs_mask
& (1 << i
))
20029 gcc_assert (num_regs
&& num_regs
<= 16);
20030 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20032 /* For the body of the insn we are going to generate an UNSPEC in
20033 parallel with several USEs. This allows the insn to be recognized
20034 by the push_multi pattern in the arm.md file.
20036 The body of the insn looks something like this:
20039 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20040 (const_int:SI <num>)))
20041 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20047 For the frame note however, we try to be more explicit and actually
20048 show each register being stored into the stack frame, plus a (single)
20049 decrement of the stack pointer. We do it this way in order to be
20050 friendly to the stack unwinding code, which only wants to see a single
20051 stack decrement per instruction. The RTL we generate for the note looks
20052 something like this:
20055 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20056 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20057 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20058 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20062 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20063 instead we'd have a parallel expression detailing all
20064 the stores to the various memory addresses so that debug
20065 information is more up-to-date. Remember however while writing
20066 this to take care of the constraints with the push instruction.
20068 Note also that this has to be taken care of for the VFP registers.
20070 For more see PR43399. */
20072 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20073 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20074 dwarf_par_index
= 1;
20076 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20078 if (mask
& (1 << i
))
20080 reg
= gen_rtx_REG (SImode
, i
);
20082 XVECEXP (par
, 0, 0)
20083 = gen_rtx_SET (gen_frame_mem
20085 gen_rtx_PRE_MODIFY (Pmode
,
20088 (Pmode
, stack_pointer_rtx
,
20091 gen_rtx_UNSPEC (BLKmode
,
20092 gen_rtvec (1, reg
),
20093 UNSPEC_PUSH_MULT
));
20095 if (dwarf_regs_mask
& (1 << i
))
20097 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20099 RTX_FRAME_RELATED_P (tmp
) = 1;
20100 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20107 for (j
= 1, i
++; j
< num_regs
; i
++)
20109 if (mask
& (1 << i
))
20111 reg
= gen_rtx_REG (SImode
, i
);
20113 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20115 if (dwarf_regs_mask
& (1 << i
))
20118 = gen_rtx_SET (gen_frame_mem
20120 plus_constant (Pmode
, stack_pointer_rtx
,
20123 RTX_FRAME_RELATED_P (tmp
) = 1;
20124 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20131 par
= emit_insn (par
);
20133 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20134 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20135 RTX_FRAME_RELATED_P (tmp
) = 1;
20136 XVECEXP (dwarf
, 0, 0) = tmp
;
20138 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20143 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20144 SIZE is the offset to be adjusted.
20145 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20147 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20151 RTX_FRAME_RELATED_P (insn
) = 1;
20152 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20153 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20156 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20157 SAVED_REGS_MASK shows which registers need to be restored.
20159 Unfortunately, since this insn does not reflect very well the actual
20160 semantics of the operation, we need to annotate the insn for the benefit
20161 of DWARF2 frame unwind information. */
20163 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20168 rtx dwarf
= NULL_RTX
;
20170 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20174 offset_adj
= return_in_pc
? 1 : 0;
20175 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20176 if (saved_regs_mask
& (1 << i
))
20179 gcc_assert (num_regs
&& num_regs
<= 16);
20181 /* If SP is in reglist, then we don't emit SP update insn. */
20182 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20184 /* The parallel needs to hold num_regs SETs
20185 and one SET for the stack update. */
20186 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20189 XVECEXP (par
, 0, 0) = ret_rtx
;
20193 /* Increment the stack pointer, based on there being
20194 num_regs 4-byte registers to restore. */
20195 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20196 plus_constant (Pmode
,
20199 RTX_FRAME_RELATED_P (tmp
) = 1;
20200 XVECEXP (par
, 0, offset_adj
) = tmp
;
20203 /* Now restore every reg, which may include PC. */
20204 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20205 if (saved_regs_mask
& (1 << i
))
20207 reg
= gen_rtx_REG (SImode
, i
);
20208 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20210 /* Emit single load with writeback. */
20211 tmp
= gen_frame_mem (SImode
,
20212 gen_rtx_POST_INC (Pmode
,
20213 stack_pointer_rtx
));
20214 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20215 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20219 tmp
= gen_rtx_SET (reg
,
20222 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20223 RTX_FRAME_RELATED_P (tmp
) = 1;
20224 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20226 /* We need to maintain a sequence for DWARF info too. As dwarf info
20227 should not have PC, skip PC. */
20228 if (i
!= PC_REGNUM
)
20229 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20235 par
= emit_jump_insn (par
);
20237 par
= emit_insn (par
);
20239 REG_NOTES (par
) = dwarf
;
20241 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20242 stack_pointer_rtx
, stack_pointer_rtx
);
20245 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20246 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20248 Unfortunately, since this insn does not reflect very well the actual
20249 semantics of the operation, we need to annotate the insn for the benefit
20250 of DWARF2 frame unwind information. */
20252 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20256 rtx dwarf
= NULL_RTX
;
20259 gcc_assert (num_regs
&& num_regs
<= 32);
20261 /* Workaround ARM10 VFPr1 bug. */
20262 if (num_regs
== 2 && !arm_arch6
)
20264 if (first_reg
== 15)
20270 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20271 there could be up to 32 D-registers to restore.
20272 If there are more than 16 D-registers, make two recursive calls,
20273 each of which emits one pop_multi instruction. */
20276 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20277 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20281 /* The parallel needs to hold num_regs SETs
20282 and one SET for the stack update. */
20283 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20285 /* Increment the stack pointer, based on there being
20286 num_regs 8-byte registers to restore. */
20287 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20288 RTX_FRAME_RELATED_P (tmp
) = 1;
20289 XVECEXP (par
, 0, 0) = tmp
;
20291 /* Now show every reg that will be restored, using a SET for each. */
20292 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20294 reg
= gen_rtx_REG (DFmode
, i
);
20296 tmp
= gen_rtx_SET (reg
,
20299 plus_constant (Pmode
, base_reg
, 8 * j
)));
20300 RTX_FRAME_RELATED_P (tmp
) = 1;
20301 XVECEXP (par
, 0, j
+ 1) = tmp
;
20303 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20308 par
= emit_insn (par
);
20309 REG_NOTES (par
) = dwarf
;
20311 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20312 if (REGNO (base_reg
) == IP_REGNUM
)
20314 RTX_FRAME_RELATED_P (par
) = 1;
20315 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20318 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20319 base_reg
, base_reg
);
20322 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20323 number of registers are being popped, multiple LDRD patterns are created for
20324 all register pairs. If odd number of registers are popped, last register is
20325 loaded by using LDR pattern. */
20327 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20331 rtx par
= NULL_RTX
;
20332 rtx dwarf
= NULL_RTX
;
20333 rtx tmp
, reg
, tmp1
;
20334 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20336 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20337 if (saved_regs_mask
& (1 << i
))
20340 gcc_assert (num_regs
&& num_regs
<= 16);
20342 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20343 to be popped. So, if num_regs is even, now it will become odd,
20344 and we can generate pop with PC. If num_regs is odd, it will be
20345 even now, and ldr with return can be generated for PC. */
20349 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20351 /* Var j iterates over all the registers to gather all the registers in
20352 saved_regs_mask. Var i gives index of saved registers in stack frame.
20353 A PARALLEL RTX of register-pair is created here, so that pattern for
20354 LDRD can be matched. As PC is always last register to be popped, and
20355 we have already decremented num_regs if PC, we don't have to worry
20356 about PC in this loop. */
20357 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20358 if (saved_regs_mask
& (1 << j
))
20360 /* Create RTX for memory load. */
20361 reg
= gen_rtx_REG (SImode
, j
);
20362 tmp
= gen_rtx_SET (reg
,
20363 gen_frame_mem (SImode
,
20364 plus_constant (Pmode
,
20365 stack_pointer_rtx
, 4 * i
)));
20366 RTX_FRAME_RELATED_P (tmp
) = 1;
20370 /* When saved-register index (i) is even, the RTX to be emitted is
20371 yet to be created. Hence create it first. The LDRD pattern we
20372 are generating is :
20373 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20374 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20375 where target registers need not be consecutive. */
20376 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20380 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20381 added as 0th element and if i is odd, reg_i is added as 1st element
20382 of LDRD pattern shown above. */
20383 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20384 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20388 /* When saved-register index (i) is odd, RTXs for both the registers
20389 to be loaded are generated in above given LDRD pattern, and the
20390 pattern can be emitted now. */
20391 par
= emit_insn (par
);
20392 REG_NOTES (par
) = dwarf
;
20393 RTX_FRAME_RELATED_P (par
) = 1;
20399 /* If the number of registers pushed is odd AND return_in_pc is false OR
20400 number of registers are even AND return_in_pc is true, last register is
20401 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20402 then LDR with post increment. */
20404 /* Increment the stack pointer, based on there being
20405 num_regs 4-byte registers to restore. */
20406 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20407 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20408 RTX_FRAME_RELATED_P (tmp
) = 1;
20409 tmp
= emit_insn (tmp
);
20412 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20413 stack_pointer_rtx
, stack_pointer_rtx
);
20418 if (((num_regs
% 2) == 1 && !return_in_pc
)
20419 || ((num_regs
% 2) == 0 && return_in_pc
))
20421 /* Scan for the single register to be popped. Skip until the saved
20422 register is found. */
20423 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20425 /* Gen LDR with post increment here. */
20426 tmp1
= gen_rtx_MEM (SImode
,
20427 gen_rtx_POST_INC (SImode
,
20428 stack_pointer_rtx
));
20429 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20431 reg
= gen_rtx_REG (SImode
, j
);
20432 tmp
= gen_rtx_SET (reg
, tmp1
);
20433 RTX_FRAME_RELATED_P (tmp
) = 1;
20434 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20438 /* If return_in_pc, j must be PC_REGNUM. */
20439 gcc_assert (j
== PC_REGNUM
);
20440 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20441 XVECEXP (par
, 0, 0) = ret_rtx
;
20442 XVECEXP (par
, 0, 1) = tmp
;
20443 par
= emit_jump_insn (par
);
20447 par
= emit_insn (tmp
);
20448 REG_NOTES (par
) = dwarf
;
20449 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20450 stack_pointer_rtx
, stack_pointer_rtx
);
20454 else if ((num_regs
% 2) == 1 && return_in_pc
)
20456 /* There are 2 registers to be popped. So, generate the pattern
20457 pop_multiple_with_stack_update_and_return to pop in PC. */
20458 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20464 /* LDRD in ARM mode needs consecutive registers as operands. This function
20465 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20466 offset addressing and then generates one separate stack udpate. This provides
20467 more scheduling freedom, compared to writeback on every load. However,
20468 if the function returns using load into PC directly
20469 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20470 before the last load. TODO: Add a peephole optimization to recognize
20471 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20472 peephole optimization to merge the load at stack-offset zero
20473 with the stack update instruction using load with writeback
20474 in post-index addressing mode. */
20476 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20480 rtx par
= NULL_RTX
;
20481 rtx dwarf
= NULL_RTX
;
20484 /* Restore saved registers. */
20485 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20487 while (j
<= LAST_ARM_REGNUM
)
20488 if (saved_regs_mask
& (1 << j
))
20491 && (saved_regs_mask
& (1 << (j
+ 1)))
20492 && (j
+ 1) != PC_REGNUM
)
20494 /* Current register and next register form register pair for which
20495 LDRD can be generated. PC is always the last register popped, and
20496 we handle it separately. */
20498 mem
= gen_frame_mem (DImode
,
20499 plus_constant (Pmode
,
20503 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20505 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20506 tmp
= emit_insn (tmp
);
20507 RTX_FRAME_RELATED_P (tmp
) = 1;
20509 /* Generate dwarf info. */
20511 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20512 gen_rtx_REG (SImode
, j
),
20514 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20515 gen_rtx_REG (SImode
, j
+ 1),
20518 REG_NOTES (tmp
) = dwarf
;
20523 else if (j
!= PC_REGNUM
)
20525 /* Emit a single word load. */
20527 mem
= gen_frame_mem (SImode
,
20528 plus_constant (Pmode
,
20532 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20534 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20535 tmp
= emit_insn (tmp
);
20536 RTX_FRAME_RELATED_P (tmp
) = 1;
20538 /* Generate dwarf info. */
20539 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20540 gen_rtx_REG (SImode
, j
),
20546 else /* j == PC_REGNUM */
20552 /* Update the stack. */
20555 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20556 plus_constant (Pmode
,
20559 tmp
= emit_insn (tmp
);
20560 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20561 stack_pointer_rtx
, stack_pointer_rtx
);
20565 if (saved_regs_mask
& (1 << PC_REGNUM
))
20567 /* Only PC is to be popped. */
20568 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20569 XVECEXP (par
, 0, 0) = ret_rtx
;
20570 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20571 gen_frame_mem (SImode
,
20572 gen_rtx_POST_INC (SImode
,
20573 stack_pointer_rtx
)));
20574 RTX_FRAME_RELATED_P (tmp
) = 1;
20575 XVECEXP (par
, 0, 1) = tmp
;
20576 par
= emit_jump_insn (par
);
20578 /* Generate dwarf info. */
20579 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20580 gen_rtx_REG (SImode
, PC_REGNUM
),
20582 REG_NOTES (par
) = dwarf
;
20583 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20584 stack_pointer_rtx
, stack_pointer_rtx
);
20588 /* Calculate the size of the return value that is passed in registers. */
20590 arm_size_return_regs (void)
20594 if (crtl
->return_rtx
!= 0)
20595 mode
= GET_MODE (crtl
->return_rtx
);
20597 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20599 return GET_MODE_SIZE (mode
);
20602 /* Return true if the current function needs to save/restore LR. */
20604 thumb_force_lr_save (void)
20606 return !cfun
->machine
->lr_save_eliminated
20608 || thumb_far_jump_used_p ()
20609 || df_regs_ever_live_p (LR_REGNUM
));
20612 /* We do not know if r3 will be available because
20613 we do have an indirect tailcall happening in this
20614 particular case. */
20616 is_indirect_tailcall_p (rtx call
)
20618 rtx pat
= PATTERN (call
);
20620 /* Indirect tail call. */
20621 pat
= XVECEXP (pat
, 0, 0);
20622 if (GET_CODE (pat
) == SET
)
20623 pat
= SET_SRC (pat
);
20625 pat
= XEXP (XEXP (pat
, 0), 0);
20626 return REG_P (pat
);
20629 /* Return true if r3 is used by any of the tail call insns in the
20630 current function. */
20632 any_sibcall_could_use_r3 (void)
20637 if (!crtl
->tail_call_emit
)
20639 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20640 if (e
->flags
& EDGE_SIBCALL
)
20642 rtx_insn
*call
= BB_END (e
->src
);
20643 if (!CALL_P (call
))
20644 call
= prev_nonnote_nondebug_insn (call
);
20645 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20646 if (find_regno_fusage (call
, USE
, 3)
20647 || is_indirect_tailcall_p (call
))
20654 /* Compute the distance from register FROM to register TO.
20655 These can be the arg pointer (26), the soft frame pointer (25),
20656 the stack pointer (13) or the hard frame pointer (11).
20657 In thumb mode r7 is used as the soft frame pointer, if needed.
20658 Typical stack layout looks like this:
20660 old stack pointer -> | |
20663 | | saved arguments for
20664 | | vararg functions
20667 hard FP & arg pointer -> | | \
20675 soft frame pointer -> | | /
20680 locals base pointer -> | | /
20685 current stack pointer -> | | /
20688 For a given function some or all of these stack components
20689 may not be needed, giving rise to the possibility of
20690 eliminating some of the registers.
20692 The values returned by this function must reflect the behavior
20693 of arm_expand_prologue() and arm_compute_save_reg_mask().
20695 The sign of the number returned reflects the direction of stack
20696 growth, so the values are positive for all eliminations except
20697 from the soft frame pointer to the hard frame pointer.
20699 SFP may point just inside the local variables block to ensure correct
20703 /* Calculate stack offsets. These are used to calculate register elimination
20704 offsets and in prologue/epilogue code. Also calculates which registers
20705 should be saved. */
20707 static arm_stack_offsets
*
20708 arm_get_frame_offsets (void)
20710 struct arm_stack_offsets
*offsets
;
20711 unsigned long func_type
;
20714 HOST_WIDE_INT frame_size
;
20717 offsets
= &cfun
->machine
->stack_offsets
;
20719 if (reload_completed
)
20722 /* Initially this is the size of the local variables. It will translated
20723 into an offset once we have determined the size of preceding data. */
20724 frame_size
= ROUND_UP_WORD (get_frame_size ());
20726 /* Space for variadic functions. */
20727 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20729 /* In Thumb mode this is incorrect, but never used. */
20731 = (offsets
->saved_args
20732 + arm_compute_static_chain_stack_bytes ()
20733 + (frame_pointer_needed
? 4 : 0));
20737 unsigned int regno
;
20739 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20740 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20741 saved
= core_saved
;
20743 /* We know that SP will be doubleword aligned on entry, and we must
20744 preserve that condition at any subroutine call. We also require the
20745 soft frame pointer to be doubleword aligned. */
20747 if (TARGET_REALLY_IWMMXT
)
20749 /* Check for the call-saved iWMMXt registers. */
20750 for (regno
= FIRST_IWMMXT_REGNUM
;
20751 regno
<= LAST_IWMMXT_REGNUM
;
20753 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20757 func_type
= arm_current_func_type ();
20758 /* Space for saved VFP registers. */
20759 if (! IS_VOLATILE (func_type
)
20760 && TARGET_HARD_FLOAT
)
20761 saved
+= arm_get_vfp_saved_size ();
20763 else /* TARGET_THUMB1 */
20765 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20766 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20767 saved
= core_saved
;
20768 if (TARGET_BACKTRACE
)
20772 /* Saved registers include the stack frame. */
20773 offsets
->saved_regs
20774 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20775 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20777 /* A leaf function does not need any stack alignment if it has nothing
20779 if (crtl
->is_leaf
&& frame_size
== 0
20780 /* However if it calls alloca(), we have a dynamically allocated
20781 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20782 && ! cfun
->calls_alloca
)
20784 offsets
->outgoing_args
= offsets
->soft_frame
;
20785 offsets
->locals_base
= offsets
->soft_frame
;
20789 /* Ensure SFP has the correct alignment. */
20790 if (ARM_DOUBLEWORD_ALIGN
20791 && (offsets
->soft_frame
& 7))
20793 offsets
->soft_frame
+= 4;
20794 /* Try to align stack by pushing an extra reg. Don't bother doing this
20795 when there is a stack frame as the alignment will be rolled into
20796 the normal stack adjustment. */
20797 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20801 /* Register r3 is caller-saved. Normally it does not need to be
20802 saved on entry by the prologue. However if we choose to save
20803 it for padding then we may confuse the compiler into thinking
20804 a prologue sequence is required when in fact it is not. This
20805 will occur when shrink-wrapping if r3 is used as a scratch
20806 register and there are no other callee-saved writes.
20808 This situation can be avoided when other callee-saved registers
20809 are available and r3 is not mandatory if we choose a callee-saved
20810 register for padding. */
20811 bool prefer_callee_reg_p
= false;
20813 /* If it is safe to use r3, then do so. This sometimes
20814 generates better code on Thumb-2 by avoiding the need to
20815 use 32-bit push/pop instructions. */
20816 if (! any_sibcall_could_use_r3 ()
20817 && arm_size_return_regs () <= 12
20818 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20820 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20823 if (!TARGET_THUMB2
)
20824 prefer_callee_reg_p
= true;
20827 || prefer_callee_reg_p
)
20829 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20831 /* Avoid fixed registers; they may be changed at
20832 arbitrary times so it's unsafe to restore them
20833 during the epilogue. */
20835 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20845 offsets
->saved_regs
+= 4;
20846 offsets
->saved_regs_mask
|= (1 << reg
);
20851 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20852 offsets
->outgoing_args
= (offsets
->locals_base
20853 + crtl
->outgoing_args_size
);
20855 if (ARM_DOUBLEWORD_ALIGN
)
20857 /* Ensure SP remains doubleword aligned. */
20858 if (offsets
->outgoing_args
& 7)
20859 offsets
->outgoing_args
+= 4;
20860 gcc_assert (!(offsets
->outgoing_args
& 7));
20867 /* Calculate the relative offsets for the different stack pointers. Positive
20868 offsets are in the direction of stack growth. */
20871 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20873 arm_stack_offsets
*offsets
;
20875 offsets
= arm_get_frame_offsets ();
20877 /* OK, now we have enough information to compute the distances.
20878 There must be an entry in these switch tables for each pair
20879 of registers in ELIMINABLE_REGS, even if some of the entries
20880 seem to be redundant or useless. */
20883 case ARG_POINTER_REGNUM
:
20886 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20889 case FRAME_POINTER_REGNUM
:
20890 /* This is the reverse of the soft frame pointer
20891 to hard frame pointer elimination below. */
20892 return offsets
->soft_frame
- offsets
->saved_args
;
20894 case ARM_HARD_FRAME_POINTER_REGNUM
:
20895 /* This is only non-zero in the case where the static chain register
20896 is stored above the frame. */
20897 return offsets
->frame
- offsets
->saved_args
- 4;
20899 case STACK_POINTER_REGNUM
:
20900 /* If nothing has been pushed on the stack at all
20901 then this will return -4. This *is* correct! */
20902 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20905 gcc_unreachable ();
20907 gcc_unreachable ();
20909 case FRAME_POINTER_REGNUM
:
20912 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20915 case ARM_HARD_FRAME_POINTER_REGNUM
:
20916 /* The hard frame pointer points to the top entry in the
20917 stack frame. The soft frame pointer to the bottom entry
20918 in the stack frame. If there is no stack frame at all,
20919 then they are identical. */
20921 return offsets
->frame
- offsets
->soft_frame
;
20923 case STACK_POINTER_REGNUM
:
20924 return offsets
->outgoing_args
- offsets
->soft_frame
;
20927 gcc_unreachable ();
20929 gcc_unreachable ();
20932 /* You cannot eliminate from the stack pointer.
20933 In theory you could eliminate from the hard frame
20934 pointer to the stack pointer, but this will never
20935 happen, since if a stack frame is not needed the
20936 hard frame pointer will never be used. */
20937 gcc_unreachable ();
20941 /* Given FROM and TO register numbers, say whether this elimination is
20942 allowed. Frame pointer elimination is automatically handled.
20944 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20945 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20946 pointer, we must eliminate FRAME_POINTER_REGNUM into
20947 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20948 ARG_POINTER_REGNUM. */
20951 arm_can_eliminate (const int from
, const int to
)
20953 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20954 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20955 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20956 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20960 /* Emit RTL to save coprocessor registers on function entry. Returns the
20961 number of bytes pushed. */
20964 arm_save_coproc_regs(void)
20966 int saved_size
= 0;
20968 unsigned start_reg
;
20971 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20972 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20974 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20975 insn
= gen_rtx_MEM (V2SImode
, insn
);
20976 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20977 RTX_FRAME_RELATED_P (insn
) = 1;
20981 if (TARGET_HARD_FLOAT
)
20983 start_reg
= FIRST_VFP_REGNUM
;
20985 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20987 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20988 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20990 if (start_reg
!= reg
)
20991 saved_size
+= vfp_emit_fstmd (start_reg
,
20992 (reg
- start_reg
) / 2);
20993 start_reg
= reg
+ 2;
20996 if (start_reg
!= reg
)
20997 saved_size
+= vfp_emit_fstmd (start_reg
,
20998 (reg
- start_reg
) / 2);
21004 /* Set the Thumb frame pointer from the stack pointer. */
21007 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21009 HOST_WIDE_INT amount
;
21012 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21014 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21015 stack_pointer_rtx
, GEN_INT (amount
)));
21018 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21019 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21020 expects the first two operands to be the same. */
21023 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21025 hard_frame_pointer_rtx
));
21029 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21030 hard_frame_pointer_rtx
,
21031 stack_pointer_rtx
));
21033 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21034 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21035 RTX_FRAME_RELATED_P (dwarf
) = 1;
21036 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21039 RTX_FRAME_RELATED_P (insn
) = 1;
21042 struct scratch_reg
{
21047 /* Return a short-lived scratch register for use as a 2nd scratch register on
21048 function entry after the registers are saved in the prologue. This register
21049 must be released by means of release_scratch_register_on_entry. IP is not
21050 considered since it is always used as the 1st scratch register if available.
21052 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21053 mask of live registers. */
21056 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21057 unsigned long live_regs
)
21063 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21069 for (i
= 4; i
< 11; i
++)
21070 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21078 /* If IP is used as the 1st scratch register for a nested function,
21079 then either r3 wasn't available or is used to preserve IP. */
21080 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21082 regno
= (regno1
== 3 ? 2 : 3);
21084 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21089 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21092 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21093 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21094 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21095 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21096 RTX_FRAME_RELATED_P (insn
) = 1;
21097 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21101 /* Release a scratch register obtained from the preceding function. */
21104 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21108 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21109 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21110 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21111 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21112 RTX_FRAME_RELATED_P (insn
) = 1;
21113 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21117 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21119 #if PROBE_INTERVAL > 4096
21120 #error Cannot use indexed addressing mode for stack probing
21123 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21124 inclusive. These are offsets from the current stack pointer. REGNO1
21125 is the index number of the 1st scratch register and LIVE_REGS is the
21126 mask of live registers. */
21129 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21130 unsigned int regno1
, unsigned long live_regs
)
21132 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21134 /* See if we have a constant small number of probes to generate. If so,
21135 that's the easy case. */
21136 if (size
<= PROBE_INTERVAL
)
21138 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21139 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21140 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21143 /* The run-time loop is made up of 10 insns in the generic case while the
21144 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21145 else if (size
<= 5 * PROBE_INTERVAL
)
21147 HOST_WIDE_INT i
, rem
;
21149 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21150 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21151 emit_stack_probe (reg1
);
21153 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21154 it exceeds SIZE. If only two probes are needed, this will not
21155 generate any code. Then probe at FIRST + SIZE. */
21156 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21158 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21159 emit_stack_probe (reg1
);
21162 rem
= size
- (i
- PROBE_INTERVAL
);
21163 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21165 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21166 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21169 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21172 /* Otherwise, do the same as above, but in a loop. Note that we must be
21173 extra careful with variables wrapping around because we might be at
21174 the very top (or the very bottom) of the address space and we have
21175 to be able to handle this case properly; in particular, we use an
21176 equality test for the loop condition. */
21179 HOST_WIDE_INT rounded_size
;
21180 struct scratch_reg sr
;
21182 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21184 emit_move_insn (reg1
, GEN_INT (first
));
21187 /* Step 1: round SIZE to the previous multiple of the interval. */
21189 rounded_size
= size
& -PROBE_INTERVAL
;
21190 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21193 /* Step 2: compute initial and final value of the loop counter. */
21195 /* TEST_ADDR = SP + FIRST. */
21196 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21198 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21199 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21202 /* Step 3: the loop
21206 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21209 while (TEST_ADDR != LAST_ADDR)
21211 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21212 until it is equal to ROUNDED_SIZE. */
21214 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21217 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21218 that SIZE is equal to ROUNDED_SIZE. */
21220 if (size
!= rounded_size
)
21222 HOST_WIDE_INT rem
= size
- rounded_size
;
21224 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21226 emit_set_insn (sr
.reg
,
21227 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21228 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21229 PROBE_INTERVAL
- rem
));
21232 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21235 release_scratch_register_on_entry (&sr
);
21238 /* Make sure nothing is scheduled before we are done. */
21239 emit_insn (gen_blockage ());
21242 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21243 absolute addresses. */
21246 output_probe_stack_range (rtx reg1
, rtx reg2
)
21248 static int labelno
= 0;
21252 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21255 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21257 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21259 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21260 output_asm_insn ("sub\t%0, %0, %1", xops
);
21262 /* Probe at TEST_ADDR. */
21263 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21265 /* Test if TEST_ADDR == LAST_ADDR. */
21267 output_asm_insn ("cmp\t%0, %1", xops
);
21270 fputs ("\tbne\t", asm_out_file
);
21271 assemble_name_raw (asm_out_file
, loop_lab
);
21272 fputc ('\n', asm_out_file
);
21277 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21280 arm_expand_prologue (void)
21285 unsigned long live_regs_mask
;
21286 unsigned long func_type
;
21288 int saved_pretend_args
= 0;
21289 int saved_regs
= 0;
21290 unsigned HOST_WIDE_INT args_to_push
;
21291 HOST_WIDE_INT size
;
21292 arm_stack_offsets
*offsets
;
21295 func_type
= arm_current_func_type ();
21297 /* Naked functions don't have prologues. */
21298 if (IS_NAKED (func_type
))
21300 if (flag_stack_usage_info
)
21301 current_function_static_stack_size
= 0;
21305 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21306 args_to_push
= crtl
->args
.pretend_args_size
;
21308 /* Compute which register we will have to save onto the stack. */
21309 offsets
= arm_get_frame_offsets ();
21310 live_regs_mask
= offsets
->saved_regs_mask
;
21312 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21314 if (IS_STACKALIGN (func_type
))
21318 /* Handle a word-aligned stack pointer. We generate the following:
21323 <save and restore r0 in normal prologue/epilogue>
21327 The unwinder doesn't need to know about the stack realignment.
21328 Just tell it we saved SP in r0. */
21329 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21331 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21332 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21334 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21335 RTX_FRAME_RELATED_P (insn
) = 1;
21336 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21338 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21340 /* ??? The CFA changes here, which may cause GDB to conclude that it
21341 has entered a different function. That said, the unwind info is
21342 correct, individually, before and after this instruction because
21343 we've described the save of SP, which will override the default
21344 handling of SP as restoring from the CFA. */
21345 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21348 /* The static chain register is the same as the IP register. If it is
21349 clobbered when creating the frame, we need to save and restore it. */
21350 clobber_ip
= IS_NESTED (func_type
)
21351 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21352 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21353 && !df_regs_ever_live_p (LR_REGNUM
)
21354 && arm_r3_live_at_start_p ()));
21356 /* Find somewhere to store IP whilst the frame is being created.
21357 We try the following places in order:
21359 1. The last argument register r3 if it is available.
21360 2. A slot on the stack above the frame if there are no
21361 arguments to push onto the stack.
21362 3. Register r3 again, after pushing the argument registers
21363 onto the stack, if this is a varargs function.
21364 4. The last slot on the stack created for the arguments to
21365 push, if this isn't a varargs function.
21367 Note - we only need to tell the dwarf2 backend about the SP
21368 adjustment in the second variant; the static chain register
21369 doesn't need to be unwound, as it doesn't contain a value
21370 inherited from the caller. */
21373 if (!arm_r3_live_at_start_p ())
21374 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21375 else if (args_to_push
== 0)
21379 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21382 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21383 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21386 /* Just tell the dwarf backend that we adjusted SP. */
21387 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21388 plus_constant (Pmode
, stack_pointer_rtx
,
21390 RTX_FRAME_RELATED_P (insn
) = 1;
21391 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21395 /* Store the args on the stack. */
21396 if (cfun
->machine
->uses_anonymous_args
)
21398 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21399 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21400 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21401 saved_pretend_args
= 1;
21407 if (args_to_push
== 4)
21408 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21410 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21411 plus_constant (Pmode
,
21415 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21417 /* Just tell the dwarf backend that we adjusted SP. */
21418 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21419 plus_constant (Pmode
, stack_pointer_rtx
,
21421 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21424 RTX_FRAME_RELATED_P (insn
) = 1;
21425 fp_offset
= args_to_push
;
21430 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21432 if (IS_INTERRUPT (func_type
))
21434 /* Interrupt functions must not corrupt any registers.
21435 Creating a frame pointer however, corrupts the IP
21436 register, so we must push it first. */
21437 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21439 /* Do not set RTX_FRAME_RELATED_P on this insn.
21440 The dwarf stack unwinding code only wants to see one
21441 stack decrement per function, and this is not it. If
21442 this instruction is labeled as being part of the frame
21443 creation sequence then dwarf2out_frame_debug_expr will
21444 die when it encounters the assignment of IP to FP
21445 later on, since the use of SP here establishes SP as
21446 the CFA register and not IP.
21448 Anyway this instruction is not really part of the stack
21449 frame creation although it is part of the prologue. */
21452 insn
= emit_set_insn (ip_rtx
,
21453 plus_constant (Pmode
, stack_pointer_rtx
,
21455 RTX_FRAME_RELATED_P (insn
) = 1;
21460 /* Push the argument registers, or reserve space for them. */
21461 if (cfun
->machine
->uses_anonymous_args
)
21462 insn
= emit_multi_reg_push
21463 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21464 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21467 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21468 GEN_INT (- args_to_push
)));
21469 RTX_FRAME_RELATED_P (insn
) = 1;
21472 /* If this is an interrupt service routine, and the link register
21473 is going to be pushed, and we're not generating extra
21474 push of IP (needed when frame is needed and frame layout if apcs),
21475 subtracting four from LR now will mean that the function return
21476 can be done with a single instruction. */
21477 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21478 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21479 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21482 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21484 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21487 if (live_regs_mask
)
21489 unsigned long dwarf_regs_mask
= live_regs_mask
;
21491 saved_regs
+= bit_count (live_regs_mask
) * 4;
21492 if (optimize_size
&& !frame_pointer_needed
21493 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21495 /* If no coprocessor registers are being pushed and we don't have
21496 to worry about a frame pointer then push extra registers to
21497 create the stack frame. This is done is a way that does not
21498 alter the frame layout, so is independent of the epilogue. */
21502 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21504 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21505 if (frame
&& n
* 4 >= frame
)
21508 live_regs_mask
|= (1 << n
) - 1;
21509 saved_regs
+= frame
;
21514 && current_tune
->prefer_ldrd_strd
21515 && !optimize_function_for_size_p (cfun
))
21517 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21519 thumb2_emit_strd_push (live_regs_mask
);
21520 else if (TARGET_ARM
21521 && !TARGET_APCS_FRAME
21522 && !IS_INTERRUPT (func_type
))
21523 arm_emit_strd_push (live_regs_mask
);
21526 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21527 RTX_FRAME_RELATED_P (insn
) = 1;
21532 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21533 RTX_FRAME_RELATED_P (insn
) = 1;
21537 if (! IS_VOLATILE (func_type
))
21538 saved_regs
+= arm_save_coproc_regs ();
21540 if (frame_pointer_needed
&& TARGET_ARM
)
21542 /* Create the new frame pointer. */
21543 if (TARGET_APCS_FRAME
)
21545 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21546 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21547 RTX_FRAME_RELATED_P (insn
) = 1;
21551 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21552 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21553 stack_pointer_rtx
, insn
));
21554 RTX_FRAME_RELATED_P (insn
) = 1;
21558 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21559 if (flag_stack_usage_info
)
21560 current_function_static_stack_size
= size
;
21562 /* If this isn't an interrupt service routine and we have a frame, then do
21563 stack checking. We use IP as the first scratch register, except for the
21564 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21565 if (!IS_INTERRUPT (func_type
)
21566 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21568 unsigned int regno
;
21570 if (!IS_NESTED (func_type
) || clobber_ip
)
21572 else if (df_regs_ever_live_p (LR_REGNUM
))
21577 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21579 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21580 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21581 size
- STACK_CHECK_PROTECT
,
21582 regno
, live_regs_mask
);
21585 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21586 regno
, live_regs_mask
);
21589 /* Recover the static chain register. */
21592 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21593 insn
= gen_rtx_REG (SImode
, 3);
21596 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21597 insn
= gen_frame_mem (SImode
, insn
);
21599 emit_set_insn (ip_rtx
, insn
);
21600 emit_insn (gen_force_register_use (ip_rtx
));
21603 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21605 /* This add can produce multiple insns for a large constant, so we
21606 need to get tricky. */
21607 rtx_insn
*last
= get_last_insn ();
21609 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21610 - offsets
->outgoing_args
);
21612 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21616 last
= last
? NEXT_INSN (last
) : get_insns ();
21617 RTX_FRAME_RELATED_P (last
) = 1;
21619 while (last
!= insn
);
21621 /* If the frame pointer is needed, emit a special barrier that
21622 will prevent the scheduler from moving stores to the frame
21623 before the stack adjustment. */
21624 if (frame_pointer_needed
)
21625 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21626 hard_frame_pointer_rtx
));
21630 if (frame_pointer_needed
&& TARGET_THUMB2
)
21631 thumb_set_frame_pointer (offsets
);
21633 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21635 unsigned long mask
;
21637 mask
= live_regs_mask
;
21638 mask
&= THUMB2_WORK_REGS
;
21639 if (!IS_NESTED (func_type
))
21640 mask
|= (1 << IP_REGNUM
);
21641 arm_load_pic_register (mask
);
21644 /* If we are profiling, make sure no instructions are scheduled before
21645 the call to mcount. Similarly if the user has requested no
21646 scheduling in the prolog. Similarly if we want non-call exceptions
21647 using the EABI unwinder, to prevent faulting instructions from being
21648 swapped with a stack adjustment. */
21649 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21650 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21651 && cfun
->can_throw_non_call_exceptions
))
21652 emit_insn (gen_blockage ());
21654 /* If the link register is being kept alive, with the return address in it,
21655 then make sure that it does not get reused by the ce2 pass. */
21656 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21657 cfun
->machine
->lr_save_eliminated
= 1;
21660 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21662 arm_print_condition (FILE *stream
)
21664 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21666 /* Branch conversion is not implemented for Thumb-2. */
21669 output_operand_lossage ("predicated Thumb instruction");
21672 if (current_insn_predicate
!= NULL
)
21674 output_operand_lossage
21675 ("predicated instruction in conditional sequence");
21679 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21681 else if (current_insn_predicate
)
21683 enum arm_cond_code code
;
21687 output_operand_lossage ("predicated Thumb instruction");
21691 code
= get_arm_condition_code (current_insn_predicate
);
21692 fputs (arm_condition_codes
[code
], stream
);
21697 /* Globally reserved letters: acln
21698 Puncutation letters currently used: @_|?().!#
21699 Lower case letters currently used: bcdefhimpqtvwxyz
21700 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21701 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21703 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21705 If CODE is 'd', then the X is a condition operand and the instruction
21706 should only be executed if the condition is true.
21707 if CODE is 'D', then the X is a condition operand and the instruction
21708 should only be executed if the condition is false: however, if the mode
21709 of the comparison is CCFPEmode, then always execute the instruction -- we
21710 do this because in these circumstances !GE does not necessarily imply LT;
21711 in these cases the instruction pattern will take care to make sure that
21712 an instruction containing %d will follow, thereby undoing the effects of
21713 doing this instruction unconditionally.
21714 If CODE is 'N' then X is a floating point operand that must be negated
21716 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21717 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21719 arm_print_operand (FILE *stream
, rtx x
, int code
)
21724 fputs (ASM_COMMENT_START
, stream
);
21728 fputs (user_label_prefix
, stream
);
21732 fputs (REGISTER_PREFIX
, stream
);
21736 arm_print_condition (stream
);
21740 /* The current condition code for a condition code setting instruction.
21741 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21742 fputc('s', stream
);
21743 arm_print_condition (stream
);
21747 /* If the instruction is conditionally executed then print
21748 the current condition code, otherwise print 's'. */
21749 gcc_assert (TARGET_THUMB2
);
21750 if (current_insn_predicate
)
21751 arm_print_condition (stream
);
21753 fputc('s', stream
);
21756 /* %# is a "break" sequence. It doesn't output anything, but is used to
21757 separate e.g. operand numbers from following text, if that text consists
21758 of further digits which we don't want to be part of the operand
21766 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21767 fprintf (stream
, "%s", fp_const_from_val (&r
));
21771 /* An integer or symbol address without a preceding # sign. */
21773 switch (GET_CODE (x
))
21776 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21780 output_addr_const (stream
, x
);
21784 if (GET_CODE (XEXP (x
, 0)) == PLUS
21785 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21787 output_addr_const (stream
, x
);
21790 /* Fall through. */
21793 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21797 /* An integer that we want to print in HEX. */
21799 switch (GET_CODE (x
))
21802 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21806 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21811 if (CONST_INT_P (x
))
21814 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21815 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21819 putc ('~', stream
);
21820 output_addr_const (stream
, x
);
21825 /* Print the log2 of a CONST_INT. */
21829 if (!CONST_INT_P (x
)
21830 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21831 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21833 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21838 /* The low 16 bits of an immediate constant. */
21839 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21843 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21847 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21855 shift
= shift_op (x
, &val
);
21859 fprintf (stream
, ", %s ", shift
);
21861 arm_print_operand (stream
, XEXP (x
, 1), 0);
21863 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21868 /* An explanation of the 'Q', 'R' and 'H' register operands:
21870 In a pair of registers containing a DI or DF value the 'Q'
21871 operand returns the register number of the register containing
21872 the least significant part of the value. The 'R' operand returns
21873 the register number of the register containing the most
21874 significant part of the value.
21876 The 'H' operand returns the higher of the two register numbers.
21877 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21878 same as the 'Q' operand, since the most significant part of the
21879 value is held in the lower number register. The reverse is true
21880 on systems where WORDS_BIG_ENDIAN is false.
21882 The purpose of these operands is to distinguish between cases
21883 where the endian-ness of the values is important (for example
21884 when they are added together), and cases where the endian-ness
21885 is irrelevant, but the order of register operations is important.
21886 For example when loading a value from memory into a register
21887 pair, the endian-ness does not matter. Provided that the value
21888 from the lower memory address is put into the lower numbered
21889 register, and the value from the higher address is put into the
21890 higher numbered register, the load will work regardless of whether
21891 the value being loaded is big-wordian or little-wordian. The
21892 order of the two register loads can matter however, if the address
21893 of the memory location is actually held in one of the registers
21894 being overwritten by the load.
21896 The 'Q' and 'R' constraints are also available for 64-bit
21899 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21901 rtx part
= gen_lowpart (SImode
, x
);
21902 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21906 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21908 output_operand_lossage ("invalid operand for code '%c'", code
);
21912 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21916 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21918 machine_mode mode
= GET_MODE (x
);
21921 if (mode
== VOIDmode
)
21923 part
= gen_highpart_mode (SImode
, mode
, x
);
21924 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21928 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21930 output_operand_lossage ("invalid operand for code '%c'", code
);
21934 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
21938 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21940 output_operand_lossage ("invalid operand for code '%c'", code
);
21944 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21948 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21950 output_operand_lossage ("invalid operand for code '%c'", code
);
21954 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21958 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21960 output_operand_lossage ("invalid operand for code '%c'", code
);
21964 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21968 asm_fprintf (stream
, "%r",
21969 REG_P (XEXP (x
, 0))
21970 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21974 asm_fprintf (stream
, "{%r-%r}",
21976 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21979 /* Like 'M', but writing doubleword vector registers, for use by Neon
21983 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21984 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21986 asm_fprintf (stream
, "{d%d}", regno
);
21988 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21993 /* CONST_TRUE_RTX means always -- that's the default. */
21994 if (x
== const_true_rtx
)
21997 if (!COMPARISON_P (x
))
21999 output_operand_lossage ("invalid operand for code '%c'", code
);
22003 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22008 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22009 want to do that. */
22010 if (x
== const_true_rtx
)
22012 output_operand_lossage ("instruction never executed");
22015 if (!COMPARISON_P (x
))
22017 output_operand_lossage ("invalid operand for code '%c'", code
);
22021 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22022 (get_arm_condition_code (x
))],
22032 /* Former Maverick support, removed after GCC-4.7. */
22033 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22038 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22039 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22040 /* Bad value for wCG register number. */
22042 output_operand_lossage ("invalid operand for code '%c'", code
);
22047 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22050 /* Print an iWMMXt control register name. */
22052 if (!CONST_INT_P (x
)
22054 || INTVAL (x
) >= 16)
22055 /* Bad value for wC register number. */
22057 output_operand_lossage ("invalid operand for code '%c'", code
);
22063 static const char * wc_reg_names
[16] =
22065 "wCID", "wCon", "wCSSF", "wCASF",
22066 "wC4", "wC5", "wC6", "wC7",
22067 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22068 "wC12", "wC13", "wC14", "wC15"
22071 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22075 /* Print the high single-precision register of a VFP double-precision
22079 machine_mode mode
= GET_MODE (x
);
22082 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22084 output_operand_lossage ("invalid operand for code '%c'", code
);
22089 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22091 output_operand_lossage ("invalid operand for code '%c'", code
);
22095 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22099 /* Print a VFP/Neon double precision or quad precision register name. */
22103 machine_mode mode
= GET_MODE (x
);
22104 int is_quad
= (code
== 'q');
22107 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22109 output_operand_lossage ("invalid operand for code '%c'", code
);
22114 || !IS_VFP_REGNUM (REGNO (x
)))
22116 output_operand_lossage ("invalid operand for code '%c'", code
);
22121 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22122 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22124 output_operand_lossage ("invalid operand for code '%c'", code
);
22128 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22129 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22133 /* These two codes print the low/high doubleword register of a Neon quad
22134 register, respectively. For pair-structure types, can also print
22135 low/high quadword registers. */
22139 machine_mode mode
= GET_MODE (x
);
22142 if ((GET_MODE_SIZE (mode
) != 16
22143 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22145 output_operand_lossage ("invalid operand for code '%c'", code
);
22150 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22152 output_operand_lossage ("invalid operand for code '%c'", code
);
22156 if (GET_MODE_SIZE (mode
) == 16)
22157 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22158 + (code
== 'f' ? 1 : 0));
22160 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22161 + (code
== 'f' ? 1 : 0));
22165 /* Print a VFPv3 floating-point constant, represented as an integer
22169 int index
= vfp3_const_double_index (x
);
22170 gcc_assert (index
!= -1);
22171 fprintf (stream
, "%d", index
);
22175 /* Print bits representing opcode features for Neon.
22177 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22178 and polynomials as unsigned.
22180 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22182 Bit 2 is 1 for rounding functions, 0 otherwise. */
22184 /* Identify the type as 's', 'u', 'p' or 'f'. */
22187 HOST_WIDE_INT bits
= INTVAL (x
);
22188 fputc ("uspf"[bits
& 3], stream
);
22192 /* Likewise, but signed and unsigned integers are both 'i'. */
22195 HOST_WIDE_INT bits
= INTVAL (x
);
22196 fputc ("iipf"[bits
& 3], stream
);
22200 /* As for 'T', but emit 'u' instead of 'p'. */
22203 HOST_WIDE_INT bits
= INTVAL (x
);
22204 fputc ("usuf"[bits
& 3], stream
);
22208 /* Bit 2: rounding (vs none). */
22211 HOST_WIDE_INT bits
= INTVAL (x
);
22212 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22216 /* Memory operand for vld1/vst1 instruction. */
22220 bool postinc
= FALSE
;
22221 rtx postinc_reg
= NULL
;
22222 unsigned align
, memsize
, align_bits
;
22224 gcc_assert (MEM_P (x
));
22225 addr
= XEXP (x
, 0);
22226 if (GET_CODE (addr
) == POST_INC
)
22229 addr
= XEXP (addr
, 0);
22231 if (GET_CODE (addr
) == POST_MODIFY
)
22233 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22234 addr
= XEXP (addr
, 0);
22236 asm_fprintf (stream
, "[%r", REGNO (addr
));
22238 /* We know the alignment of this access, so we can emit a hint in the
22239 instruction (for some alignments) as an aid to the memory subsystem
22241 align
= MEM_ALIGN (x
) >> 3;
22242 memsize
= MEM_SIZE (x
);
22244 /* Only certain alignment specifiers are supported by the hardware. */
22245 if (memsize
== 32 && (align
% 32) == 0)
22247 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22249 else if (memsize
>= 8 && (align
% 8) == 0)
22254 if (align_bits
!= 0)
22255 asm_fprintf (stream
, ":%d", align_bits
);
22257 asm_fprintf (stream
, "]");
22260 fputs("!", stream
);
22262 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22270 gcc_assert (MEM_P (x
));
22271 addr
= XEXP (x
, 0);
22272 gcc_assert (REG_P (addr
));
22273 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22277 /* Translate an S register number into a D register number and element index. */
22280 machine_mode mode
= GET_MODE (x
);
22283 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22285 output_operand_lossage ("invalid operand for code '%c'", code
);
22290 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22292 output_operand_lossage ("invalid operand for code '%c'", code
);
22296 regno
= regno
- FIRST_VFP_REGNUM
;
22297 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22302 gcc_assert (CONST_DOUBLE_P (x
));
22304 result
= vfp3_const_double_for_fract_bits (x
);
22306 result
= vfp3_const_double_for_bits (x
);
22307 fprintf (stream
, "#%d", result
);
22310 /* Register specifier for vld1.16/vst1.16. Translate the S register
22311 number into a D register number and element index. */
22314 machine_mode mode
= GET_MODE (x
);
22317 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22319 output_operand_lossage ("invalid operand for code '%c'", code
);
22324 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22326 output_operand_lossage ("invalid operand for code '%c'", code
);
22330 regno
= regno
- FIRST_VFP_REGNUM
;
22331 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22338 output_operand_lossage ("missing operand");
22342 switch (GET_CODE (x
))
22345 asm_fprintf (stream
, "%r", REGNO (x
));
22349 output_address (GET_MODE (x
), XEXP (x
, 0));
22355 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22356 sizeof (fpstr
), 0, 1);
22357 fprintf (stream
, "#%s", fpstr
);
22362 gcc_assert (GET_CODE (x
) != NEG
);
22363 fputc ('#', stream
);
22364 if (GET_CODE (x
) == HIGH
)
22366 fputs (":lower16:", stream
);
22370 output_addr_const (stream
, x
);
22376 /* Target hook for printing a memory address. */
22378 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22382 int is_minus
= GET_CODE (x
) == MINUS
;
22385 asm_fprintf (stream
, "[%r]", REGNO (x
));
22386 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22388 rtx base
= XEXP (x
, 0);
22389 rtx index
= XEXP (x
, 1);
22390 HOST_WIDE_INT offset
= 0;
22392 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22394 /* Ensure that BASE is a register. */
22395 /* (one of them must be). */
22396 /* Also ensure the SP is not used as in index register. */
22397 std::swap (base
, index
);
22399 switch (GET_CODE (index
))
22402 offset
= INTVAL (index
);
22405 asm_fprintf (stream
, "[%r, #%wd]",
22406 REGNO (base
), offset
);
22410 asm_fprintf (stream
, "[%r, %s%r]",
22411 REGNO (base
), is_minus
? "-" : "",
22421 asm_fprintf (stream
, "[%r, %s%r",
22422 REGNO (base
), is_minus
? "-" : "",
22423 REGNO (XEXP (index
, 0)));
22424 arm_print_operand (stream
, index
, 'S');
22425 fputs ("]", stream
);
22430 gcc_unreachable ();
22433 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22434 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22436 gcc_assert (REG_P (XEXP (x
, 0)));
22438 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22439 asm_fprintf (stream
, "[%r, #%s%d]!",
22440 REGNO (XEXP (x
, 0)),
22441 GET_CODE (x
) == PRE_DEC
? "-" : "",
22442 GET_MODE_SIZE (mode
));
22444 asm_fprintf (stream
, "[%r], #%s%d",
22445 REGNO (XEXP (x
, 0)),
22446 GET_CODE (x
) == POST_DEC
? "-" : "",
22447 GET_MODE_SIZE (mode
));
22449 else if (GET_CODE (x
) == PRE_MODIFY
)
22451 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22452 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22453 asm_fprintf (stream
, "#%wd]!",
22454 INTVAL (XEXP (XEXP (x
, 1), 1)));
22456 asm_fprintf (stream
, "%r]!",
22457 REGNO (XEXP (XEXP (x
, 1), 1)));
22459 else if (GET_CODE (x
) == POST_MODIFY
)
22461 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22462 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22463 asm_fprintf (stream
, "#%wd",
22464 INTVAL (XEXP (XEXP (x
, 1), 1)));
22466 asm_fprintf (stream
, "%r",
22467 REGNO (XEXP (XEXP (x
, 1), 1)));
22469 else output_addr_const (stream
, x
);
22474 asm_fprintf (stream
, "[%r]", REGNO (x
));
22475 else if (GET_CODE (x
) == POST_INC
)
22476 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22477 else if (GET_CODE (x
) == PLUS
)
22479 gcc_assert (REG_P (XEXP (x
, 0)));
22480 if (CONST_INT_P (XEXP (x
, 1)))
22481 asm_fprintf (stream
, "[%r, #%wd]",
22482 REGNO (XEXP (x
, 0)),
22483 INTVAL (XEXP (x
, 1)));
22485 asm_fprintf (stream
, "[%r, %r]",
22486 REGNO (XEXP (x
, 0)),
22487 REGNO (XEXP (x
, 1)));
22490 output_addr_const (stream
, x
);
22494 /* Target hook for indicating whether a punctuation character for
22495 TARGET_PRINT_OPERAND is valid. */
22497 arm_print_operand_punct_valid_p (unsigned char code
)
22499 return (code
== '@' || code
== '|' || code
== '.'
22500 || code
== '(' || code
== ')' || code
== '#'
22501 || (TARGET_32BIT
&& (code
== '?'))
22502 || (TARGET_THUMB2
&& (code
== '!'))
22503 || (TARGET_THUMB
&& (code
== '_')));
22506 /* Target hook for assembling integer objects. The ARM version needs to
22507 handle word-sized values specially. */
22509 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22513 if (size
== UNITS_PER_WORD
&& aligned_p
)
22515 fputs ("\t.word\t", asm_out_file
);
22516 output_addr_const (asm_out_file
, x
);
22518 /* Mark symbols as position independent. We only do this in the
22519 .text segment, not in the .data segment. */
22520 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22521 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22523 /* See legitimize_pic_address for an explanation of the
22524 TARGET_VXWORKS_RTP check. */
22525 /* References to weak symbols cannot be resolved locally:
22526 they may be overridden by a non-weak definition at link
22528 if (!arm_pic_data_is_text_relative
22529 || (GET_CODE (x
) == SYMBOL_REF
22530 && (!SYMBOL_REF_LOCAL_P (x
)
22531 || (SYMBOL_REF_DECL (x
)
22532 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22533 fputs ("(GOT)", asm_out_file
);
22535 fputs ("(GOTOFF)", asm_out_file
);
22537 fputc ('\n', asm_out_file
);
22541 mode
= GET_MODE (x
);
22543 if (arm_vector_mode_supported_p (mode
))
22547 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22549 units
= CONST_VECTOR_NUNITS (x
);
22550 size
= GET_MODE_UNIT_SIZE (mode
);
22552 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22553 for (i
= 0; i
< units
; i
++)
22555 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22557 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22560 for (i
= 0; i
< units
; i
++)
22562 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22564 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
22565 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22571 return default_assemble_integer (x
, size
, aligned_p
);
22575 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22579 if (!TARGET_AAPCS_BASED
)
22582 default_named_section_asm_out_constructor
22583 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22587 /* Put these in the .init_array section, using a special relocation. */
22588 if (priority
!= DEFAULT_INIT_PRIORITY
)
22591 sprintf (buf
, "%s.%.5u",
22592 is_ctor
? ".init_array" : ".fini_array",
22594 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22601 switch_to_section (s
);
22602 assemble_align (POINTER_SIZE
);
22603 fputs ("\t.word\t", asm_out_file
);
22604 output_addr_const (asm_out_file
, symbol
);
22605 fputs ("(target1)\n", asm_out_file
);
22608 /* Add a function to the list of static constructors. */
22611 arm_elf_asm_constructor (rtx symbol
, int priority
)
22613 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22616 /* Add a function to the list of static destructors. */
22619 arm_elf_asm_destructor (rtx symbol
, int priority
)
22621 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22624 /* A finite state machine takes care of noticing whether or not instructions
22625 can be conditionally executed, and thus decrease execution time and code
22626 size by deleting branch instructions. The fsm is controlled by
22627 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22629 /* The state of the fsm controlling condition codes are:
22630 0: normal, do nothing special
22631 1: make ASM_OUTPUT_OPCODE not output this instruction
22632 2: make ASM_OUTPUT_OPCODE not output this instruction
22633 3: make instructions conditional
22634 4: make instructions conditional
22636 State transitions (state->state by whom under condition):
22637 0 -> 1 final_prescan_insn if the `target' is a label
22638 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22639 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22640 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22641 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22642 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22643 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22644 (the target insn is arm_target_insn).
22646 If the jump clobbers the conditions then we use states 2 and 4.
22648 A similar thing can be done with conditional return insns.
22650 XXX In case the `target' is an unconditional branch, this conditionalising
22651 of the instructions always reduces code size, but not always execution
22652 time. But then, I want to reduce the code size to somewhere near what
22653 /bin/cc produces. */
22655 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22656 instructions. When a COND_EXEC instruction is seen the subsequent
22657 instructions are scanned so that multiple conditional instructions can be
22658 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22659 specify the length and true/false mask for the IT block. These will be
22660 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22662 /* Returns the index of the ARM condition code string in
22663 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22664 COMPARISON should be an rtx like `(eq (...) (...))'. */
22667 maybe_get_arm_condition_code (rtx comparison
)
22669 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22670 enum arm_cond_code code
;
22671 enum rtx_code comp_code
= GET_CODE (comparison
);
22673 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22674 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22675 XEXP (comparison
, 1));
22679 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22680 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22681 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22682 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22683 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22684 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22685 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22686 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22687 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22688 case CC_DLTUmode
: code
= ARM_CC
;
22691 if (comp_code
== EQ
)
22692 return ARM_INVERSE_CONDITION_CODE (code
);
22693 if (comp_code
== NE
)
22700 case NE
: return ARM_NE
;
22701 case EQ
: return ARM_EQ
;
22702 case GE
: return ARM_PL
;
22703 case LT
: return ARM_MI
;
22704 default: return ARM_NV
;
22710 case NE
: return ARM_NE
;
22711 case EQ
: return ARM_EQ
;
22712 default: return ARM_NV
;
22718 case NE
: return ARM_MI
;
22719 case EQ
: return ARM_PL
;
22720 default: return ARM_NV
;
22725 /* We can handle all cases except UNEQ and LTGT. */
22728 case GE
: return ARM_GE
;
22729 case GT
: return ARM_GT
;
22730 case LE
: return ARM_LS
;
22731 case LT
: return ARM_MI
;
22732 case NE
: return ARM_NE
;
22733 case EQ
: return ARM_EQ
;
22734 case ORDERED
: return ARM_VC
;
22735 case UNORDERED
: return ARM_VS
;
22736 case UNLT
: return ARM_LT
;
22737 case UNLE
: return ARM_LE
;
22738 case UNGT
: return ARM_HI
;
22739 case UNGE
: return ARM_PL
;
22740 /* UNEQ and LTGT do not have a representation. */
22741 case UNEQ
: /* Fall through. */
22742 case LTGT
: /* Fall through. */
22743 default: return ARM_NV
;
22749 case NE
: return ARM_NE
;
22750 case EQ
: return ARM_EQ
;
22751 case GE
: return ARM_LE
;
22752 case GT
: return ARM_LT
;
22753 case LE
: return ARM_GE
;
22754 case LT
: return ARM_GT
;
22755 case GEU
: return ARM_LS
;
22756 case GTU
: return ARM_CC
;
22757 case LEU
: return ARM_CS
;
22758 case LTU
: return ARM_HI
;
22759 default: return ARM_NV
;
22765 case LTU
: return ARM_CS
;
22766 case GEU
: return ARM_CC
;
22767 case NE
: return ARM_CS
;
22768 case EQ
: return ARM_CC
;
22769 default: return ARM_NV
;
22775 case NE
: return ARM_NE
;
22776 case EQ
: return ARM_EQ
;
22777 case GEU
: return ARM_CS
;
22778 case GTU
: return ARM_HI
;
22779 case LEU
: return ARM_LS
;
22780 case LTU
: return ARM_CC
;
22781 default: return ARM_NV
;
22787 case GE
: return ARM_GE
;
22788 case LT
: return ARM_LT
;
22789 case GEU
: return ARM_CS
;
22790 case LTU
: return ARM_CC
;
22791 default: return ARM_NV
;
22797 case NE
: return ARM_VS
;
22798 case EQ
: return ARM_VC
;
22799 default: return ARM_NV
;
22805 case NE
: return ARM_NE
;
22806 case EQ
: return ARM_EQ
;
22807 case GE
: return ARM_GE
;
22808 case GT
: return ARM_GT
;
22809 case LE
: return ARM_LE
;
22810 case LT
: return ARM_LT
;
22811 case GEU
: return ARM_CS
;
22812 case GTU
: return ARM_HI
;
22813 case LEU
: return ARM_LS
;
22814 case LTU
: return ARM_CC
;
22815 default: return ARM_NV
;
22818 default: gcc_unreachable ();
22822 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22823 static enum arm_cond_code
22824 get_arm_condition_code (rtx comparison
)
22826 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22827 gcc_assert (code
!= ARM_NV
);
22831 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22834 thumb2_final_prescan_insn (rtx_insn
*insn
)
22836 rtx_insn
*first_insn
= insn
;
22837 rtx body
= PATTERN (insn
);
22839 enum arm_cond_code code
;
22844 /* max_insns_skipped in the tune was already taken into account in the
22845 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22846 just emit the IT blocks as we can. It does not make sense to split
22848 max
= MAX_INSN_PER_IT_BLOCK
;
22850 /* Remove the previous insn from the count of insns to be output. */
22851 if (arm_condexec_count
)
22852 arm_condexec_count
--;
22854 /* Nothing to do if we are already inside a conditional block. */
22855 if (arm_condexec_count
)
22858 if (GET_CODE (body
) != COND_EXEC
)
22861 /* Conditional jumps are implemented directly. */
22865 predicate
= COND_EXEC_TEST (body
);
22866 arm_current_cc
= get_arm_condition_code (predicate
);
22868 n
= get_attr_ce_count (insn
);
22869 arm_condexec_count
= 1;
22870 arm_condexec_mask
= (1 << n
) - 1;
22871 arm_condexec_masklen
= n
;
22872 /* See if subsequent instructions can be combined into the same block. */
22875 insn
= next_nonnote_insn (insn
);
22877 /* Jumping into the middle of an IT block is illegal, so a label or
22878 barrier terminates the block. */
22879 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22882 body
= PATTERN (insn
);
22883 /* USE and CLOBBER aren't really insns, so just skip them. */
22884 if (GET_CODE (body
) == USE
22885 || GET_CODE (body
) == CLOBBER
)
22888 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22889 if (GET_CODE (body
) != COND_EXEC
)
22891 /* Maximum number of conditionally executed instructions in a block. */
22892 n
= get_attr_ce_count (insn
);
22893 if (arm_condexec_masklen
+ n
> max
)
22896 predicate
= COND_EXEC_TEST (body
);
22897 code
= get_arm_condition_code (predicate
);
22898 mask
= (1 << n
) - 1;
22899 if (arm_current_cc
== code
)
22900 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22901 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22904 arm_condexec_count
++;
22905 arm_condexec_masklen
+= n
;
22907 /* A jump must be the last instruction in a conditional block. */
22911 /* Restore recog_data (getting the attributes of other insns can
22912 destroy this array, but final.c assumes that it remains intact
22913 across this call). */
22914 extract_constrain_insn_cached (first_insn
);
22918 arm_final_prescan_insn (rtx_insn
*insn
)
22920 /* BODY will hold the body of INSN. */
22921 rtx body
= PATTERN (insn
);
22923 /* This will be 1 if trying to repeat the trick, and things need to be
22924 reversed if it appears to fail. */
22927 /* If we start with a return insn, we only succeed if we find another one. */
22928 int seeking_return
= 0;
22929 enum rtx_code return_code
= UNKNOWN
;
22931 /* START_INSN will hold the insn from where we start looking. This is the
22932 first insn after the following code_label if REVERSE is true. */
22933 rtx_insn
*start_insn
= insn
;
22935 /* If in state 4, check if the target branch is reached, in order to
22936 change back to state 0. */
22937 if (arm_ccfsm_state
== 4)
22939 if (insn
== arm_target_insn
)
22941 arm_target_insn
= NULL
;
22942 arm_ccfsm_state
= 0;
22947 /* If in state 3, it is possible to repeat the trick, if this insn is an
22948 unconditional branch to a label, and immediately following this branch
22949 is the previous target label which is only used once, and the label this
22950 branch jumps to is not too far off. */
22951 if (arm_ccfsm_state
== 3)
22953 if (simplejump_p (insn
))
22955 start_insn
= next_nonnote_insn (start_insn
);
22956 if (BARRIER_P (start_insn
))
22958 /* XXX Isn't this always a barrier? */
22959 start_insn
= next_nonnote_insn (start_insn
);
22961 if (LABEL_P (start_insn
)
22962 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22963 && LABEL_NUSES (start_insn
) == 1)
22968 else if (ANY_RETURN_P (body
))
22970 start_insn
= next_nonnote_insn (start_insn
);
22971 if (BARRIER_P (start_insn
))
22972 start_insn
= next_nonnote_insn (start_insn
);
22973 if (LABEL_P (start_insn
)
22974 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22975 && LABEL_NUSES (start_insn
) == 1)
22978 seeking_return
= 1;
22979 return_code
= GET_CODE (body
);
22988 gcc_assert (!arm_ccfsm_state
|| reverse
);
22989 if (!JUMP_P (insn
))
22992 /* This jump might be paralleled with a clobber of the condition codes
22993 the jump should always come first */
22994 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22995 body
= XVECEXP (body
, 0, 0);
22998 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22999 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23002 int fail
= FALSE
, succeed
= FALSE
;
23003 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23004 int then_not_else
= TRUE
;
23005 rtx_insn
*this_insn
= start_insn
;
23008 /* Register the insn jumped to. */
23011 if (!seeking_return
)
23012 label
= XEXP (SET_SRC (body
), 0);
23014 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23015 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23016 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23018 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23019 then_not_else
= FALSE
;
23021 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23023 seeking_return
= 1;
23024 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23026 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23028 seeking_return
= 1;
23029 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23030 then_not_else
= FALSE
;
23033 gcc_unreachable ();
23035 /* See how many insns this branch skips, and what kind of insns. If all
23036 insns are okay, and the label or unconditional branch to the same
23037 label is not too far away, succeed. */
23038 for (insns_skipped
= 0;
23039 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23043 this_insn
= next_nonnote_insn (this_insn
);
23047 switch (GET_CODE (this_insn
))
23050 /* Succeed if it is the target label, otherwise fail since
23051 control falls in from somewhere else. */
23052 if (this_insn
== label
)
23054 arm_ccfsm_state
= 1;
23062 /* Succeed if the following insn is the target label.
23064 If return insns are used then the last insn in a function
23065 will be a barrier. */
23066 this_insn
= next_nonnote_insn (this_insn
);
23067 if (this_insn
&& this_insn
== label
)
23069 arm_ccfsm_state
= 1;
23077 /* The AAPCS says that conditional calls should not be
23078 used since they make interworking inefficient (the
23079 linker can't transform BL<cond> into BLX). That's
23080 only a problem if the machine has BLX. */
23087 /* Succeed if the following insn is the target label, or
23088 if the following two insns are a barrier and the
23090 this_insn
= next_nonnote_insn (this_insn
);
23091 if (this_insn
&& BARRIER_P (this_insn
))
23092 this_insn
= next_nonnote_insn (this_insn
);
23094 if (this_insn
&& this_insn
== label
23095 && insns_skipped
< max_insns_skipped
)
23097 arm_ccfsm_state
= 1;
23105 /* If this is an unconditional branch to the same label, succeed.
23106 If it is to another label, do nothing. If it is conditional,
23108 /* XXX Probably, the tests for SET and the PC are
23111 scanbody
= PATTERN (this_insn
);
23112 if (GET_CODE (scanbody
) == SET
23113 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23115 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23116 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23118 arm_ccfsm_state
= 2;
23121 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23124 /* Fail if a conditional return is undesirable (e.g. on a
23125 StrongARM), but still allow this if optimizing for size. */
23126 else if (GET_CODE (scanbody
) == return_code
23127 && !use_return_insn (TRUE
, NULL
)
23130 else if (GET_CODE (scanbody
) == return_code
)
23132 arm_ccfsm_state
= 2;
23135 else if (GET_CODE (scanbody
) == PARALLEL
)
23137 switch (get_attr_conds (this_insn
))
23147 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23152 /* Instructions using or affecting the condition codes make it
23154 scanbody
= PATTERN (this_insn
);
23155 if (!(GET_CODE (scanbody
) == SET
23156 || GET_CODE (scanbody
) == PARALLEL
)
23157 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23167 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23168 arm_target_label
= CODE_LABEL_NUMBER (label
);
23171 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23173 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23175 this_insn
= next_nonnote_insn (this_insn
);
23176 gcc_assert (!this_insn
23177 || (!BARRIER_P (this_insn
)
23178 && !LABEL_P (this_insn
)));
23182 /* Oh, dear! we ran off the end.. give up. */
23183 extract_constrain_insn_cached (insn
);
23184 arm_ccfsm_state
= 0;
23185 arm_target_insn
= NULL
;
23188 arm_target_insn
= this_insn
;
23191 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23194 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23196 if (reverse
|| then_not_else
)
23197 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23200 /* Restore recog_data (getting the attributes of other insns can
23201 destroy this array, but final.c assumes that it remains intact
23202 across this call. */
23203 extract_constrain_insn_cached (insn
);
23207 /* Output IT instructions. */
23209 thumb2_asm_output_opcode (FILE * stream
)
23214 if (arm_condexec_mask
)
23216 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23217 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23219 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23220 arm_condition_codes
[arm_current_cc
]);
23221 arm_condexec_mask
= 0;
23225 /* Returns true if REGNO is a valid register
23226 for holding a quantity of type MODE. */
23228 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23230 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23231 return (regno
== CC_REGNUM
23232 || (TARGET_HARD_FLOAT
23233 && regno
== VFPCC_REGNUM
));
23235 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23239 /* For the Thumb we only allow values bigger than SImode in
23240 registers 0 - 6, so that there is always a second low
23241 register available to hold the upper part of the value.
23242 We probably we ought to ensure that the register is the
23243 start of an even numbered register pair. */
23244 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23246 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23248 if (mode
== SFmode
|| mode
== SImode
)
23249 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23251 if (mode
== DFmode
)
23252 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23254 if (mode
== HFmode
)
23255 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23257 /* VFP registers can hold HImode values. */
23258 if (mode
== HImode
)
23259 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23262 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23263 || (VALID_NEON_QREG_MODE (mode
)
23264 && NEON_REGNO_OK_FOR_QUAD (regno
))
23265 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23266 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23267 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23268 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23269 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23274 if (TARGET_REALLY_IWMMXT
)
23276 if (IS_IWMMXT_GR_REGNUM (regno
))
23277 return mode
== SImode
;
23279 if (IS_IWMMXT_REGNUM (regno
))
23280 return VALID_IWMMXT_REG_MODE (mode
);
23283 /* We allow almost any value to be stored in the general registers.
23284 Restrict doubleword quantities to even register pairs in ARM state
23285 so that we can use ldrd. Do not allow very large Neon structure
23286 opaque modes in general registers; they would use too many. */
23287 if (regno
<= LAST_ARM_REGNUM
)
23289 if (ARM_NUM_REGS (mode
) > 4)
23295 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23298 if (regno
== FRAME_POINTER_REGNUM
23299 || regno
== ARG_POINTER_REGNUM
)
23300 /* We only allow integers in the fake hard registers. */
23301 return GET_MODE_CLASS (mode
) == MODE_INT
;
23306 /* Implement MODES_TIEABLE_P. */
23309 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23311 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23314 /* We specifically want to allow elements of "structure" modes to
23315 be tieable to the structure. This more general condition allows
23316 other rarer situations too. */
23318 && (VALID_NEON_DREG_MODE (mode1
)
23319 || VALID_NEON_QREG_MODE (mode1
)
23320 || VALID_NEON_STRUCT_MODE (mode1
))
23321 && (VALID_NEON_DREG_MODE (mode2
)
23322 || VALID_NEON_QREG_MODE (mode2
)
23323 || VALID_NEON_STRUCT_MODE (mode2
)))
23329 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23330 not used in arm mode. */
23333 arm_regno_class (int regno
)
23335 if (regno
== PC_REGNUM
)
23340 if (regno
== STACK_POINTER_REGNUM
)
23342 if (regno
== CC_REGNUM
)
23349 if (TARGET_THUMB2
&& regno
< 8)
23352 if ( regno
<= LAST_ARM_REGNUM
23353 || regno
== FRAME_POINTER_REGNUM
23354 || regno
== ARG_POINTER_REGNUM
)
23355 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23357 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23358 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23360 if (IS_VFP_REGNUM (regno
))
23362 if (regno
<= D7_VFP_REGNUM
)
23363 return VFP_D0_D7_REGS
;
23364 else if (regno
<= LAST_LO_VFP_REGNUM
)
23365 return VFP_LO_REGS
;
23367 return VFP_HI_REGS
;
23370 if (IS_IWMMXT_REGNUM (regno
))
23371 return IWMMXT_REGS
;
23373 if (IS_IWMMXT_GR_REGNUM (regno
))
23374 return IWMMXT_GR_REGS
;
23379 /* Handle a special case when computing the offset
23380 of an argument from the frame pointer. */
23382 arm_debugger_arg_offset (int value
, rtx addr
)
23386 /* We are only interested if dbxout_parms() failed to compute the offset. */
23390 /* We can only cope with the case where the address is held in a register. */
23394 /* If we are using the frame pointer to point at the argument, then
23395 an offset of 0 is correct. */
23396 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23399 /* If we are using the stack pointer to point at the
23400 argument, then an offset of 0 is correct. */
23401 /* ??? Check this is consistent with thumb2 frame layout. */
23402 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23403 && REGNO (addr
) == SP_REGNUM
)
23406 /* Oh dear. The argument is pointed to by a register rather
23407 than being held in a register, or being stored at a known
23408 offset from the frame pointer. Since GDB only understands
23409 those two kinds of argument we must translate the address
23410 held in the register into an offset from the frame pointer.
23411 We do this by searching through the insns for the function
23412 looking to see where this register gets its value. If the
23413 register is initialized from the frame pointer plus an offset
23414 then we are in luck and we can continue, otherwise we give up.
23416 This code is exercised by producing debugging information
23417 for a function with arguments like this:
23419 double func (double a, double b, int c, double d) {return d;}
23421 Without this code the stab for parameter 'd' will be set to
23422 an offset of 0 from the frame pointer, rather than 8. */
23424 /* The if() statement says:
23426 If the insn is a normal instruction
23427 and if the insn is setting the value in a register
23428 and if the register being set is the register holding the address of the argument
23429 and if the address is computing by an addition
23430 that involves adding to a register
23431 which is the frame pointer
23436 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23438 if ( NONJUMP_INSN_P (insn
)
23439 && GET_CODE (PATTERN (insn
)) == SET
23440 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23441 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23442 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23443 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23444 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23447 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23456 warning (0, "unable to compute real location of stacked parameter");
23457 value
= 8; /* XXX magic hack */
23463 /* Implement TARGET_PROMOTED_TYPE. */
23466 arm_promoted_type (const_tree t
)
23468 if (SCALAR_FLOAT_TYPE_P (t
)
23469 && TYPE_PRECISION (t
) == 16
23470 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23471 return float_type_node
;
23475 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23476 This simply adds HFmode as a supported mode; even though we don't
23477 implement arithmetic on this type directly, it's supported by
23478 optabs conversions, much the way the double-word arithmetic is
23479 special-cased in the default hook. */
23482 arm_scalar_mode_supported_p (machine_mode mode
)
23484 if (mode
== HFmode
)
23485 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23486 else if (ALL_FIXED_POINT_MODE_P (mode
))
23489 return default_scalar_mode_supported_p (mode
);
23492 /* Set the value of FLT_EVAL_METHOD.
23493 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23495 0: evaluate all operations and constants, whose semantic type has at
23496 most the range and precision of type float, to the range and
23497 precision of float; evaluate all other operations and constants to
23498 the range and precision of the semantic type;
23500 N, where _FloatN is a supported interchange floating type
23501 evaluate all operations and constants, whose semantic type has at
23502 most the range and precision of _FloatN type, to the range and
23503 precision of the _FloatN type; evaluate all other operations and
23504 constants to the range and precision of the semantic type;
23506 If we have the ARMv8.2-A extensions then we support _Float16 in native
23507 precision, so we should set this to 16. Otherwise, we support the type,
23508 but want to evaluate expressions in float precision, so set this to
23511 static enum flt_eval_method
23512 arm_excess_precision (enum excess_precision_type type
)
23516 case EXCESS_PRECISION_TYPE_FAST
:
23517 case EXCESS_PRECISION_TYPE_STANDARD
:
23518 /* We can calculate either in 16-bit range and precision or
23519 32-bit range and precision. Make that decision based on whether
23520 we have native support for the ARMv8.2-A 16-bit floating-point
23521 instructions or not. */
23522 return (TARGET_VFP_FP16INST
23523 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23524 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23525 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23526 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23528 gcc_unreachable ();
23530 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23534 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23535 _Float16 if we are using anything other than ieee format for 16-bit
23536 floating point. Otherwise, punt to the default implementation. */
23537 static machine_mode
23538 arm_floatn_mode (int n
, bool extended
)
23540 if (!extended
&& n
== 16)
23541 return arm_fp16_format
== ARM_FP16_FORMAT_IEEE
? HFmode
: VOIDmode
;
23543 return default_floatn_mode (n
, extended
);
23547 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23548 not to early-clobber SRC registers in the process.
23550 We assume that the operands described by SRC and DEST represent a
23551 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23552 number of components into which the copy has been decomposed. */
23554 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23558 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23559 || REGNO (operands
[0]) < REGNO (operands
[1]))
23561 for (i
= 0; i
< count
; i
++)
23563 operands
[2 * i
] = dest
[i
];
23564 operands
[2 * i
+ 1] = src
[i
];
23569 for (i
= 0; i
< count
; i
++)
23571 operands
[2 * i
] = dest
[count
- i
- 1];
23572 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23577 /* Split operands into moves from op[1] + op[2] into op[0]. */
23580 neon_split_vcombine (rtx operands
[3])
23582 unsigned int dest
= REGNO (operands
[0]);
23583 unsigned int src1
= REGNO (operands
[1]);
23584 unsigned int src2
= REGNO (operands
[2]);
23585 machine_mode halfmode
= GET_MODE (operands
[1]);
23586 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23587 rtx destlo
, desthi
;
23589 if (src1
== dest
&& src2
== dest
+ halfregs
)
23591 /* No-op move. Can't split to nothing; emit something. */
23592 emit_note (NOTE_INSN_DELETED
);
23596 /* Preserve register attributes for variable tracking. */
23597 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23598 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23599 GET_MODE_SIZE (halfmode
));
23601 /* Special case of reversed high/low parts. Use VSWP. */
23602 if (src2
== dest
&& src1
== dest
+ halfregs
)
23604 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23605 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23606 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23610 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23612 /* Try to avoid unnecessary moves if part of the result
23613 is in the right place already. */
23615 emit_move_insn (destlo
, operands
[1]);
23616 if (src2
!= dest
+ halfregs
)
23617 emit_move_insn (desthi
, operands
[2]);
23621 if (src2
!= dest
+ halfregs
)
23622 emit_move_insn (desthi
, operands
[2]);
23624 emit_move_insn (destlo
, operands
[1]);
23628 /* Return the number (counting from 0) of
23629 the least significant set bit in MASK. */
23632 number_of_first_bit_set (unsigned mask
)
23634 return ctz_hwi (mask
);
23637 /* Like emit_multi_reg_push, but allowing for a different set of
23638 registers to be described as saved. MASK is the set of registers
23639 to be saved; REAL_REGS is the set of registers to be described as
23640 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23643 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23645 unsigned long regno
;
23646 rtx par
[10], tmp
, reg
;
23650 /* Build the parallel of the registers actually being stored. */
23651 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23653 regno
= ctz_hwi (mask
);
23654 reg
= gen_rtx_REG (SImode
, regno
);
23657 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23659 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23664 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23665 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23666 tmp
= gen_frame_mem (BLKmode
, tmp
);
23667 tmp
= gen_rtx_SET (tmp
, par
[0]);
23670 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23671 insn
= emit_insn (tmp
);
23673 /* Always build the stack adjustment note for unwind info. */
23674 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23675 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23678 /* Build the parallel of the registers recorded as saved for unwind. */
23679 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23681 regno
= ctz_hwi (real_regs
);
23682 reg
= gen_rtx_REG (SImode
, regno
);
23684 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23685 tmp
= gen_frame_mem (SImode
, tmp
);
23686 tmp
= gen_rtx_SET (tmp
, reg
);
23687 RTX_FRAME_RELATED_P (tmp
) = 1;
23695 RTX_FRAME_RELATED_P (par
[0]) = 1;
23696 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23699 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23704 /* Emit code to push or pop registers to or from the stack. F is the
23705 assembly file. MASK is the registers to pop. */
23707 thumb_pop (FILE *f
, unsigned long mask
)
23710 int lo_mask
= mask
& 0xFF;
23711 int pushed_words
= 0;
23715 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23717 /* Special case. Do not generate a POP PC statement here, do it in
23719 thumb_exit (f
, -1);
23723 fprintf (f
, "\tpop\t{");
23725 /* Look at the low registers first. */
23726 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23730 asm_fprintf (f
, "%r", regno
);
23732 if ((lo_mask
& ~1) != 0)
23739 if (mask
& (1 << PC_REGNUM
))
23741 /* Catch popping the PC. */
23742 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23743 || IS_CMSE_ENTRY (arm_current_func_type ()))
23745 /* The PC is never poped directly, instead
23746 it is popped into r3 and then BX is used. */
23747 fprintf (f
, "}\n");
23749 thumb_exit (f
, -1);
23758 asm_fprintf (f
, "%r", PC_REGNUM
);
23762 fprintf (f
, "}\n");
23765 /* Generate code to return from a thumb function.
23766 If 'reg_containing_return_addr' is -1, then the return address is
23767 actually on the stack, at the stack pointer. */
23769 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23771 unsigned regs_available_for_popping
;
23772 unsigned regs_to_pop
;
23774 unsigned available
;
23778 int restore_a4
= FALSE
;
23780 /* Compute the registers we need to pop. */
23784 if (reg_containing_return_addr
== -1)
23786 regs_to_pop
|= 1 << LR_REGNUM
;
23790 if (TARGET_BACKTRACE
)
23792 /* Restore the (ARM) frame pointer and stack pointer. */
23793 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23797 /* If there is nothing to pop then just emit the BX instruction and
23799 if (pops_needed
== 0)
23801 if (crtl
->calls_eh_return
)
23802 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23804 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23806 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
23807 reg_containing_return_addr
);
23808 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
23811 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23814 /* Otherwise if we are not supporting interworking and we have not created
23815 a backtrace structure and the function was not entered in ARM mode then
23816 just pop the return address straight into the PC. */
23817 else if (!TARGET_INTERWORK
23818 && !TARGET_BACKTRACE
23819 && !is_called_in_ARM_mode (current_function_decl
)
23820 && !crtl
->calls_eh_return
23821 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23823 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23827 /* Find out how many of the (return) argument registers we can corrupt. */
23828 regs_available_for_popping
= 0;
23830 /* If returning via __builtin_eh_return, the bottom three registers
23831 all contain information needed for the return. */
23832 if (crtl
->calls_eh_return
)
23836 /* If we can deduce the registers used from the function's
23837 return value. This is more reliable that examining
23838 df_regs_ever_live_p () because that will be set if the register is
23839 ever used in the function, not just if the register is used
23840 to hold a return value. */
23842 if (crtl
->return_rtx
!= 0)
23843 mode
= GET_MODE (crtl
->return_rtx
);
23845 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23847 size
= GET_MODE_SIZE (mode
);
23851 /* In a void function we can use any argument register.
23852 In a function that returns a structure on the stack
23853 we can use the second and third argument registers. */
23854 if (mode
== VOIDmode
)
23855 regs_available_for_popping
=
23856 (1 << ARG_REGISTER (1))
23857 | (1 << ARG_REGISTER (2))
23858 | (1 << ARG_REGISTER (3));
23860 regs_available_for_popping
=
23861 (1 << ARG_REGISTER (2))
23862 | (1 << ARG_REGISTER (3));
23864 else if (size
<= 4)
23865 regs_available_for_popping
=
23866 (1 << ARG_REGISTER (2))
23867 | (1 << ARG_REGISTER (3));
23868 else if (size
<= 8)
23869 regs_available_for_popping
=
23870 (1 << ARG_REGISTER (3));
23873 /* Match registers to be popped with registers into which we pop them. */
23874 for (available
= regs_available_for_popping
,
23875 required
= regs_to_pop
;
23876 required
!= 0 && available
!= 0;
23877 available
&= ~(available
& - available
),
23878 required
&= ~(required
& - required
))
23881 /* If we have any popping registers left over, remove them. */
23883 regs_available_for_popping
&= ~available
;
23885 /* Otherwise if we need another popping register we can use
23886 the fourth argument register. */
23887 else if (pops_needed
)
23889 /* If we have not found any free argument registers and
23890 reg a4 contains the return address, we must move it. */
23891 if (regs_available_for_popping
== 0
23892 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23894 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23895 reg_containing_return_addr
= LR_REGNUM
;
23897 else if (size
> 12)
23899 /* Register a4 is being used to hold part of the return value,
23900 but we have dire need of a free, low register. */
23903 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23906 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23908 /* The fourth argument register is available. */
23909 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23915 /* Pop as many registers as we can. */
23916 thumb_pop (f
, regs_available_for_popping
);
23918 /* Process the registers we popped. */
23919 if (reg_containing_return_addr
== -1)
23921 /* The return address was popped into the lowest numbered register. */
23922 regs_to_pop
&= ~(1 << LR_REGNUM
);
23924 reg_containing_return_addr
=
23925 number_of_first_bit_set (regs_available_for_popping
);
23927 /* Remove this register for the mask of available registers, so that
23928 the return address will not be corrupted by further pops. */
23929 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23932 /* If we popped other registers then handle them here. */
23933 if (regs_available_for_popping
)
23937 /* Work out which register currently contains the frame pointer. */
23938 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23940 /* Move it into the correct place. */
23941 asm_fprintf (f
, "\tmov\t%r, %r\n",
23942 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23944 /* (Temporarily) remove it from the mask of popped registers. */
23945 regs_available_for_popping
&= ~(1 << frame_pointer
);
23946 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
23948 if (regs_available_for_popping
)
23952 /* We popped the stack pointer as well,
23953 find the register that contains it. */
23954 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23956 /* Move it into the stack register. */
23957 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
23959 /* At this point we have popped all necessary registers, so
23960 do not worry about restoring regs_available_for_popping
23961 to its correct value:
23963 assert (pops_needed == 0)
23964 assert (regs_available_for_popping == (1 << frame_pointer))
23965 assert (regs_to_pop == (1 << STACK_POINTER)) */
23969 /* Since we have just move the popped value into the frame
23970 pointer, the popping register is available for reuse, and
23971 we know that we still have the stack pointer left to pop. */
23972 regs_available_for_popping
|= (1 << frame_pointer
);
23976 /* If we still have registers left on the stack, but we no longer have
23977 any registers into which we can pop them, then we must move the return
23978 address into the link register and make available the register that
23980 if (regs_available_for_popping
== 0 && pops_needed
> 0)
23982 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
23984 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
23985 reg_containing_return_addr
);
23987 reg_containing_return_addr
= LR_REGNUM
;
23990 /* If we have registers left on the stack then pop some more.
23991 We know that at most we will want to pop FP and SP. */
23992 if (pops_needed
> 0)
23997 thumb_pop (f
, regs_available_for_popping
);
23999 /* We have popped either FP or SP.
24000 Move whichever one it is into the correct register. */
24001 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24002 move_to
= number_of_first_bit_set (regs_to_pop
);
24004 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24006 regs_to_pop
&= ~(1 << move_to
);
24011 /* If we still have not popped everything then we must have only
24012 had one register available to us and we are now popping the SP. */
24013 if (pops_needed
> 0)
24017 thumb_pop (f
, regs_available_for_popping
);
24019 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24021 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24023 assert (regs_to_pop == (1 << STACK_POINTER))
24024 assert (pops_needed == 1)
24028 /* If necessary restore the a4 register. */
24031 if (reg_containing_return_addr
!= LR_REGNUM
)
24033 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24034 reg_containing_return_addr
= LR_REGNUM
;
24037 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24040 if (crtl
->calls_eh_return
)
24041 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24043 /* Return to caller. */
24044 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24046 /* This is for the cases where LR is not being used to contain the return
24047 address. It may therefore contain information that we might not want
24048 to leak, hence it must be cleared. The value in R0 will never be a
24049 secret at this point, so it is safe to use it, see the clearing code
24050 in 'cmse_nonsecure_entry_clear_before_return'. */
24051 if (reg_containing_return_addr
!= LR_REGNUM
)
24052 asm_fprintf (f
, "\tmov\tlr, r0\n");
24054 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24055 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24058 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24061 /* Scan INSN just before assembler is output for it.
24062 For Thumb-1, we track the status of the condition codes; this
24063 information is used in the cbranchsi4_insn pattern. */
24065 thumb1_final_prescan_insn (rtx_insn
*insn
)
24067 if (flag_print_asm_name
)
24068 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24069 INSN_ADDRESSES (INSN_UID (insn
)));
24070 /* Don't overwrite the previous setter when we get to a cbranch. */
24071 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24073 enum attr_conds conds
;
24075 if (cfun
->machine
->thumb1_cc_insn
)
24077 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24078 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24081 conds
= get_attr_conds (insn
);
24082 if (conds
== CONDS_SET
)
24084 rtx set
= single_set (insn
);
24085 cfun
->machine
->thumb1_cc_insn
= insn
;
24086 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24087 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24088 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24089 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24091 rtx src1
= XEXP (SET_SRC (set
), 1);
24092 if (src1
== const0_rtx
)
24093 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24095 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24097 /* Record the src register operand instead of dest because
24098 cprop_hardreg pass propagates src. */
24099 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24102 else if (conds
!= CONDS_NOCOND
)
24103 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24106 /* Check if unexpected far jump is used. */
24107 if (cfun
->machine
->lr_save_eliminated
24108 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24109 internal_error("Unexpected thumb1 far jump");
24113 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24115 unsigned HOST_WIDE_INT mask
= 0xff;
24118 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24119 if (val
== 0) /* XXX */
24122 for (i
= 0; i
< 25; i
++)
24123 if ((val
& (mask
<< i
)) == val
)
24129 /* Returns nonzero if the current function contains,
24130 or might contain a far jump. */
24132 thumb_far_jump_used_p (void)
24135 bool far_jump
= false;
24136 unsigned int func_size
= 0;
24138 /* If we have already decided that far jumps may be used,
24139 do not bother checking again, and always return true even if
24140 it turns out that they are not being used. Once we have made
24141 the decision that far jumps are present (and that hence the link
24142 register will be pushed onto the stack) we cannot go back on it. */
24143 if (cfun
->machine
->far_jump_used
)
24146 /* If this function is not being called from the prologue/epilogue
24147 generation code then it must be being called from the
24148 INITIAL_ELIMINATION_OFFSET macro. */
24149 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24151 /* In this case we know that we are being asked about the elimination
24152 of the arg pointer register. If that register is not being used,
24153 then there are no arguments on the stack, and we do not have to
24154 worry that a far jump might force the prologue to push the link
24155 register, changing the stack offsets. In this case we can just
24156 return false, since the presence of far jumps in the function will
24157 not affect stack offsets.
24159 If the arg pointer is live (or if it was live, but has now been
24160 eliminated and so set to dead) then we do have to test to see if
24161 the function might contain a far jump. This test can lead to some
24162 false negatives, since before reload is completed, then length of
24163 branch instructions is not known, so gcc defaults to returning their
24164 longest length, which in turn sets the far jump attribute to true.
24166 A false negative will not result in bad code being generated, but it
24167 will result in a needless push and pop of the link register. We
24168 hope that this does not occur too often.
24170 If we need doubleword stack alignment this could affect the other
24171 elimination offsets so we can't risk getting it wrong. */
24172 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24173 cfun
->machine
->arg_pointer_live
= 1;
24174 else if (!cfun
->machine
->arg_pointer_live
)
24178 /* We should not change far_jump_used during or after reload, as there is
24179 no chance to change stack frame layout. */
24180 if (reload_in_progress
|| reload_completed
)
24183 /* Check to see if the function contains a branch
24184 insn with the far jump attribute set. */
24185 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24187 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24191 func_size
+= get_attr_length (insn
);
24194 /* Attribute far_jump will always be true for thumb1 before
24195 shorten_branch pass. So checking far_jump attribute before
24196 shorten_branch isn't much useful.
24198 Following heuristic tries to estimate more accurately if a far jump
24199 may finally be used. The heuristic is very conservative as there is
24200 no chance to roll-back the decision of not to use far jump.
24202 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24203 2-byte insn is associated with a 4 byte constant pool. Using
24204 function size 2048/3 as the threshold is conservative enough. */
24207 if ((func_size
* 3) >= 2048)
24209 /* Record the fact that we have decided that
24210 the function does use far jumps. */
24211 cfun
->machine
->far_jump_used
= 1;
24219 /* Return nonzero if FUNC must be entered in ARM mode. */
24221 is_called_in_ARM_mode (tree func
)
24223 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24225 /* Ignore the problem about functions whose address is taken. */
24226 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24230 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24236 /* Given the stack offsets and register mask in OFFSETS, decide how
24237 many additional registers to push instead of subtracting a constant
24238 from SP. For epilogues the principle is the same except we use pop.
24239 FOR_PROLOGUE indicates which we're generating. */
24241 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24243 HOST_WIDE_INT amount
;
24244 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24245 /* Extract a mask of the ones we can give to the Thumb's push/pop
24247 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24248 /* Then count how many other high registers will need to be pushed. */
24249 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24250 int n_free
, reg_base
, size
;
24252 if (!for_prologue
&& frame_pointer_needed
)
24253 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24255 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24257 /* If the stack frame size is 512 exactly, we can save one load
24258 instruction, which should make this a win even when optimizing
24260 if (!optimize_size
&& amount
!= 512)
24263 /* Can't do this if there are high registers to push. */
24264 if (high_regs_pushed
!= 0)
24267 /* Shouldn't do it in the prologue if no registers would normally
24268 be pushed at all. In the epilogue, also allow it if we'll have
24269 a pop insn for the PC. */
24272 || TARGET_BACKTRACE
24273 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24274 || TARGET_INTERWORK
24275 || crtl
->args
.pretend_args_size
!= 0))
24278 /* Don't do this if thumb_expand_prologue wants to emit instructions
24279 between the push and the stack frame allocation. */
24281 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24282 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24289 size
= arm_size_return_regs ();
24290 reg_base
= ARM_NUM_INTS (size
);
24291 live_regs_mask
>>= reg_base
;
24294 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24295 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24297 live_regs_mask
>>= 1;
24303 gcc_assert (amount
/ 4 * 4 == amount
);
24305 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24306 return (amount
- 508) / 4;
24307 if (amount
<= n_free
* 4)
24312 /* The bits which aren't usefully expanded as rtl. */
24314 thumb1_unexpanded_epilogue (void)
24316 arm_stack_offsets
*offsets
;
24318 unsigned long live_regs_mask
= 0;
24319 int high_regs_pushed
= 0;
24321 int had_to_push_lr
;
24324 if (cfun
->machine
->return_used_this_function
!= 0)
24327 if (IS_NAKED (arm_current_func_type ()))
24330 offsets
= arm_get_frame_offsets ();
24331 live_regs_mask
= offsets
->saved_regs_mask
;
24332 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24334 /* If we can deduce the registers used from the function's return value.
24335 This is more reliable that examining df_regs_ever_live_p () because that
24336 will be set if the register is ever used in the function, not just if
24337 the register is used to hold a return value. */
24338 size
= arm_size_return_regs ();
24340 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24343 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24344 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24347 /* The prolog may have pushed some high registers to use as
24348 work registers. e.g. the testsuite file:
24349 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24350 compiles to produce:
24351 push {r4, r5, r6, r7, lr}
24355 as part of the prolog. We have to undo that pushing here. */
24357 if (high_regs_pushed
)
24359 unsigned long mask
= live_regs_mask
& 0xff;
24362 /* The available low registers depend on the size of the value we are
24370 /* Oh dear! We have no low registers into which we can pop
24373 ("no low registers available for popping high registers");
24375 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24376 if (live_regs_mask
& (1 << next_hi_reg
))
24379 while (high_regs_pushed
)
24381 /* Find lo register(s) into which the high register(s) can
24383 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24385 if (mask
& (1 << regno
))
24386 high_regs_pushed
--;
24387 if (high_regs_pushed
== 0)
24391 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24393 /* Pop the values into the low register(s). */
24394 thumb_pop (asm_out_file
, mask
);
24396 /* Move the value(s) into the high registers. */
24397 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24399 if (mask
& (1 << regno
))
24401 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24404 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24405 if (live_regs_mask
& (1 << next_hi_reg
))
24410 live_regs_mask
&= ~0x0f00;
24413 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24414 live_regs_mask
&= 0xff;
24416 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24418 /* Pop the return address into the PC. */
24419 if (had_to_push_lr
)
24420 live_regs_mask
|= 1 << PC_REGNUM
;
24422 /* Either no argument registers were pushed or a backtrace
24423 structure was created which includes an adjusted stack
24424 pointer, so just pop everything. */
24425 if (live_regs_mask
)
24426 thumb_pop (asm_out_file
, live_regs_mask
);
24428 /* We have either just popped the return address into the
24429 PC or it is was kept in LR for the entire function.
24430 Note that thumb_pop has already called thumb_exit if the
24431 PC was in the list. */
24432 if (!had_to_push_lr
)
24433 thumb_exit (asm_out_file
, LR_REGNUM
);
24437 /* Pop everything but the return address. */
24438 if (live_regs_mask
)
24439 thumb_pop (asm_out_file
, live_regs_mask
);
24441 if (had_to_push_lr
)
24445 /* We have no free low regs, so save one. */
24446 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24450 /* Get the return address into a temporary register. */
24451 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24455 /* Move the return address to lr. */
24456 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24458 /* Restore the low register. */
24459 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24464 regno
= LAST_ARG_REGNUM
;
24469 /* Remove the argument registers that were pushed onto the stack. */
24470 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24471 SP_REGNUM
, SP_REGNUM
,
24472 crtl
->args
.pretend_args_size
);
24474 thumb_exit (asm_out_file
, regno
);
24480 /* Functions to save and restore machine-specific function data. */
24481 static struct machine_function
*
24482 arm_init_machine_status (void)
24484 struct machine_function
*machine
;
24485 machine
= ggc_cleared_alloc
<machine_function
> ();
24487 #if ARM_FT_UNKNOWN != 0
24488 machine
->func_type
= ARM_FT_UNKNOWN
;
24493 /* Return an RTX indicating where the return address to the
24494 calling function can be found. */
24496 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24501 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24504 /* Do anything needed before RTL is emitted for each function. */
24506 arm_init_expanders (void)
24508 /* Arrange to initialize and mark the machine per-function status. */
24509 init_machine_status
= arm_init_machine_status
;
24511 /* This is to stop the combine pass optimizing away the alignment
24512 adjustment of va_arg. */
24513 /* ??? It is claimed that this should not be necessary. */
24515 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24518 /* Check that FUNC is called with a different mode. */
24521 arm_change_mode_p (tree func
)
24523 if (TREE_CODE (func
) != FUNCTION_DECL
)
24526 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24529 callee_tree
= target_option_default_node
;
24531 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24532 int flags
= callee_opts
->x_target_flags
;
24534 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24537 /* Like arm_compute_initial_elimination offset. Simpler because there
24538 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24539 to point at the base of the local variables after static stack
24540 space for a function has been allocated. */
24543 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24545 arm_stack_offsets
*offsets
;
24547 offsets
= arm_get_frame_offsets ();
24551 case ARG_POINTER_REGNUM
:
24554 case STACK_POINTER_REGNUM
:
24555 return offsets
->outgoing_args
- offsets
->saved_args
;
24557 case FRAME_POINTER_REGNUM
:
24558 return offsets
->soft_frame
- offsets
->saved_args
;
24560 case ARM_HARD_FRAME_POINTER_REGNUM
:
24561 return offsets
->saved_regs
- offsets
->saved_args
;
24563 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24564 return offsets
->locals_base
- offsets
->saved_args
;
24567 gcc_unreachable ();
24571 case FRAME_POINTER_REGNUM
:
24574 case STACK_POINTER_REGNUM
:
24575 return offsets
->outgoing_args
- offsets
->soft_frame
;
24577 case ARM_HARD_FRAME_POINTER_REGNUM
:
24578 return offsets
->saved_regs
- offsets
->soft_frame
;
24580 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24581 return offsets
->locals_base
- offsets
->soft_frame
;
24584 gcc_unreachable ();
24589 gcc_unreachable ();
24593 /* Generate the function's prologue. */
24596 thumb1_expand_prologue (void)
24600 HOST_WIDE_INT amount
;
24601 HOST_WIDE_INT size
;
24602 arm_stack_offsets
*offsets
;
24603 unsigned long func_type
;
24605 unsigned long live_regs_mask
;
24606 unsigned long l_mask
;
24607 unsigned high_regs_pushed
= 0;
24608 bool lr_needs_saving
;
24610 func_type
= arm_current_func_type ();
24612 /* Naked functions don't have prologues. */
24613 if (IS_NAKED (func_type
))
24615 if (flag_stack_usage_info
)
24616 current_function_static_stack_size
= 0;
24620 if (IS_INTERRUPT (func_type
))
24622 error ("interrupt Service Routines cannot be coded in Thumb mode");
24626 if (is_called_in_ARM_mode (current_function_decl
))
24627 emit_insn (gen_prologue_thumb1_interwork ());
24629 offsets
= arm_get_frame_offsets ();
24630 live_regs_mask
= offsets
->saved_regs_mask
;
24631 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24633 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24634 l_mask
= live_regs_mask
& 0x40ff;
24635 /* Then count how many other high registers will need to be pushed. */
24636 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24638 if (crtl
->args
.pretend_args_size
)
24640 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24642 if (cfun
->machine
->uses_anonymous_args
)
24644 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24645 unsigned long mask
;
24647 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24648 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24650 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24654 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24655 stack_pointer_rtx
, x
));
24657 RTX_FRAME_RELATED_P (insn
) = 1;
24660 if (TARGET_BACKTRACE
)
24662 HOST_WIDE_INT offset
= 0;
24663 unsigned work_register
;
24664 rtx work_reg
, x
, arm_hfp_rtx
;
24666 /* We have been asked to create a stack backtrace structure.
24667 The code looks like this:
24671 0 sub SP, #16 Reserve space for 4 registers.
24672 2 push {R7} Push low registers.
24673 4 add R7, SP, #20 Get the stack pointer before the push.
24674 6 str R7, [SP, #8] Store the stack pointer
24675 (before reserving the space).
24676 8 mov R7, PC Get hold of the start of this code + 12.
24677 10 str R7, [SP, #16] Store it.
24678 12 mov R7, FP Get hold of the current frame pointer.
24679 14 str R7, [SP, #4] Store it.
24680 16 mov R7, LR Get hold of the current return address.
24681 18 str R7, [SP, #12] Store it.
24682 20 add R7, SP, #16 Point at the start of the
24683 backtrace structure.
24684 22 mov FP, R7 Put this value into the frame pointer. */
24686 work_register
= thumb_find_work_register (live_regs_mask
);
24687 work_reg
= gen_rtx_REG (SImode
, work_register
);
24688 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24690 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24691 stack_pointer_rtx
, GEN_INT (-16)));
24692 RTX_FRAME_RELATED_P (insn
) = 1;
24696 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24697 RTX_FRAME_RELATED_P (insn
) = 1;
24698 lr_needs_saving
= false;
24700 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24703 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24704 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24706 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24707 x
= gen_frame_mem (SImode
, x
);
24708 emit_move_insn (x
, work_reg
);
24710 /* Make sure that the instruction fetching the PC is in the right place
24711 to calculate "start of backtrace creation code + 12". */
24712 /* ??? The stores using the common WORK_REG ought to be enough to
24713 prevent the scheduler from doing anything weird. Failing that
24714 we could always move all of the following into an UNSPEC_VOLATILE. */
24717 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24718 emit_move_insn (work_reg
, x
);
24720 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24721 x
= gen_frame_mem (SImode
, x
);
24722 emit_move_insn (x
, work_reg
);
24724 emit_move_insn (work_reg
, arm_hfp_rtx
);
24726 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24727 x
= gen_frame_mem (SImode
, x
);
24728 emit_move_insn (x
, work_reg
);
24732 emit_move_insn (work_reg
, arm_hfp_rtx
);
24734 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24735 x
= gen_frame_mem (SImode
, x
);
24736 emit_move_insn (x
, work_reg
);
24738 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24739 emit_move_insn (work_reg
, x
);
24741 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24742 x
= gen_frame_mem (SImode
, x
);
24743 emit_move_insn (x
, work_reg
);
24746 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24747 emit_move_insn (work_reg
, x
);
24749 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24750 x
= gen_frame_mem (SImode
, x
);
24751 emit_move_insn (x
, work_reg
);
24753 x
= GEN_INT (offset
+ 12);
24754 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24756 emit_move_insn (arm_hfp_rtx
, work_reg
);
24758 /* Optimization: If we are not pushing any low registers but we are going
24759 to push some high registers then delay our first push. This will just
24760 be a push of LR and we can combine it with the push of the first high
24762 else if ((l_mask
& 0xff) != 0
24763 || (high_regs_pushed
== 0 && lr_needs_saving
))
24765 unsigned long mask
= l_mask
;
24766 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24767 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24768 RTX_FRAME_RELATED_P (insn
) = 1;
24769 lr_needs_saving
= false;
24772 if (high_regs_pushed
)
24774 unsigned pushable_regs
;
24775 unsigned next_hi_reg
;
24776 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24777 : crtl
->args
.info
.nregs
;
24778 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24780 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24781 if (live_regs_mask
& (1 << next_hi_reg
))
24784 /* Here we need to mask out registers used for passing arguments
24785 even if they can be pushed. This is to avoid using them to stash the high
24786 registers. Such kind of stash may clobber the use of arguments. */
24787 pushable_regs
= l_mask
& (~arg_regs_mask
);
24788 if (lr_needs_saving
)
24789 pushable_regs
&= ~(1 << LR_REGNUM
);
24791 if (pushable_regs
== 0)
24792 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24794 while (high_regs_pushed
> 0)
24796 unsigned long real_regs_mask
= 0;
24797 unsigned long push_mask
= 0;
24799 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24801 if (pushable_regs
& (1 << regno
))
24803 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24804 gen_rtx_REG (SImode
, next_hi_reg
));
24806 high_regs_pushed
--;
24807 real_regs_mask
|= (1 << next_hi_reg
);
24808 push_mask
|= (1 << regno
);
24810 if (high_regs_pushed
)
24812 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24814 if (live_regs_mask
& (1 << next_hi_reg
))
24822 /* If we had to find a work register and we have not yet
24823 saved the LR then add it to the list of regs to push. */
24824 if (lr_needs_saving
)
24826 push_mask
|= 1 << LR_REGNUM
;
24827 real_regs_mask
|= 1 << LR_REGNUM
;
24828 lr_needs_saving
= false;
24831 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
24832 RTX_FRAME_RELATED_P (insn
) = 1;
24836 /* Load the pic register before setting the frame pointer,
24837 so we can use r7 as a temporary work register. */
24838 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24839 arm_load_pic_register (live_regs_mask
);
24841 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24842 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24843 stack_pointer_rtx
);
24845 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24846 if (flag_stack_usage_info
)
24847 current_function_static_stack_size
= size
;
24849 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24850 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24851 sorry ("-fstack-check=specific for Thumb-1");
24853 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24854 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24859 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24860 GEN_INT (- amount
)));
24861 RTX_FRAME_RELATED_P (insn
) = 1;
24867 /* The stack decrement is too big for an immediate value in a single
24868 insn. In theory we could issue multiple subtracts, but after
24869 three of them it becomes more space efficient to place the full
24870 value in the constant pool and load into a register. (Also the
24871 ARM debugger really likes to see only one stack decrement per
24872 function). So instead we look for a scratch register into which
24873 we can load the decrement, and then we subtract this from the
24874 stack pointer. Unfortunately on the thumb the only available
24875 scratch registers are the argument registers, and we cannot use
24876 these as they may hold arguments to the function. Instead we
24877 attempt to locate a call preserved register which is used by this
24878 function. If we can find one, then we know that it will have
24879 been pushed at the start of the prologue and so we can corrupt
24881 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24882 if (live_regs_mask
& (1 << regno
))
24885 gcc_assert(regno
<= LAST_LO_REGNUM
);
24887 reg
= gen_rtx_REG (SImode
, regno
);
24889 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24891 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24892 stack_pointer_rtx
, reg
));
24894 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24895 plus_constant (Pmode
, stack_pointer_rtx
,
24897 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24898 RTX_FRAME_RELATED_P (insn
) = 1;
24902 if (frame_pointer_needed
)
24903 thumb_set_frame_pointer (offsets
);
24905 /* If we are profiling, make sure no instructions are scheduled before
24906 the call to mcount. Similarly if the user has requested no
24907 scheduling in the prolog. Similarly if we want non-call exceptions
24908 using the EABI unwinder, to prevent faulting instructions from being
24909 swapped with a stack adjustment. */
24910 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24911 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24912 && cfun
->can_throw_non_call_exceptions
))
24913 emit_insn (gen_blockage ());
24915 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24916 if (live_regs_mask
& 0xff)
24917 cfun
->machine
->lr_save_eliminated
= 0;
24920 /* Clear caller saved registers not used to pass return values and leaked
24921 condition flags before exiting a cmse_nonsecure_entry function. */
24924 cmse_nonsecure_entry_clear_before_return (void)
24926 uint64_t to_clear_mask
[2];
24927 uint32_t padding_bits_to_clear
= 0;
24928 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
24929 int regno
, maxregno
= IP_REGNUM
;
24933 to_clear_mask
[0] = (1ULL << (NUM_ARG_REGS
)) - 1;
24934 to_clear_mask
[0] |= (1ULL << IP_REGNUM
);
24936 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
24937 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
24938 to make sure the instructions used to clear them are present. */
24939 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
24941 uint64_t float_mask
= (1ULL << (D7_VFP_REGNUM
+ 1)) - 1;
24942 maxregno
= LAST_VFP_REGNUM
;
24944 float_mask
&= ~((1ULL << FIRST_VFP_REGNUM
) - 1);
24945 to_clear_mask
[0] |= float_mask
;
24947 float_mask
= (1ULL << (maxregno
- 63)) - 1;
24948 to_clear_mask
[1] = float_mask
;
24950 /* Make sure we don't clear the two scratch registers used to clear the
24951 relevant FPSCR bits in output_return_instruction. */
24952 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
24953 to_clear_mask
[0] &= ~(1ULL << IP_REGNUM
);
24954 emit_use (gen_rtx_REG (SImode
, 4));
24955 to_clear_mask
[0] &= ~(1ULL << 4);
24958 /* If the user has defined registers to be caller saved, these are no longer
24959 restored by the function before returning and must thus be cleared for
24960 security purposes. */
24961 for (regno
= NUM_ARG_REGS
; regno
< LAST_VFP_REGNUM
; regno
++)
24963 /* We do not touch registers that can be used to pass arguments as per
24964 the AAPCS, since these should never be made callee-saved by user
24966 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
24968 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
24970 if (call_used_regs
[regno
])
24971 to_clear_mask
[regno
/ 64] |= (1ULL << (regno
% 64));
24974 /* Make sure we do not clear the registers used to return the result in. */
24975 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
24976 if (!VOID_TYPE_P (result_type
))
24978 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
24980 /* No need to check that we return in registers, because we don't
24981 support returning on stack yet. */
24983 &= ~compute_not_to_clear_mask (result_type
, result_rtl
, 0,
24984 padding_bits_to_clear_ptr
);
24987 if (padding_bits_to_clear
!= 0)
24990 /* Padding bits to clear is not 0 so we know we are dealing with
24991 returning a composite type, which only uses r0. Let's make sure that
24992 r1-r3 is cleared too, we will use r1 as a scratch register. */
24993 gcc_assert ((to_clear_mask
[0] & 0xe) == 0xe);
24995 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
24997 /* Fill the lower half of the negated padding_bits_to_clear. */
24998 emit_move_insn (reg_rtx
,
24999 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
25001 /* Also fill the top half of the negated padding_bits_to_clear. */
25002 if (((~padding_bits_to_clear
) >> 16) > 0)
25003 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
25006 GEN_INT ((~padding_bits_to_clear
) >> 16)));
25008 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
25009 gen_rtx_REG (SImode
, R0_REGNUM
),
25013 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25015 if (!(to_clear_mask
[regno
/ 64] & (1ULL << (regno
% 64))))
25018 if (IS_VFP_REGNUM (regno
))
25020 /* If regno is an even vfp register and its successor is also to
25021 be cleared, use vmov. */
25022 if (TARGET_VFP_DOUBLE
25023 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25024 && to_clear_mask
[regno
/ 64] & (1ULL << ((regno
% 64) + 1)))
25026 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25027 CONST1_RTX (DFmode
));
25028 emit_use (gen_rtx_REG (DFmode
, regno
));
25033 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25034 CONST1_RTX (SFmode
));
25035 emit_use (gen_rtx_REG (SFmode
, regno
));
25042 if (regno
== R0_REGNUM
)
25043 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25046 /* R0 has either been cleared before, see code above, or it
25047 holds a return value, either way it is not secret
25049 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25050 gen_rtx_REG (SImode
, R0_REGNUM
));
25051 emit_use (gen_rtx_REG (SImode
, regno
));
25055 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25056 gen_rtx_REG (SImode
, LR_REGNUM
));
25057 emit_use (gen_rtx_REG (SImode
, regno
));
25063 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25064 POP instruction can be generated. LR should be replaced by PC. All
25065 the checks required are already done by USE_RETURN_INSN (). Hence,
25066 all we really need to check here is if single register is to be
25067 returned, or multiple register return. */
25069 thumb2_expand_return (bool simple_return
)
25072 unsigned long saved_regs_mask
;
25073 arm_stack_offsets
*offsets
;
25075 offsets
= arm_get_frame_offsets ();
25076 saved_regs_mask
= offsets
->saved_regs_mask
;
25078 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25079 if (saved_regs_mask
& (1 << i
))
25082 if (!simple_return
&& saved_regs_mask
)
25084 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25085 functions or adapt code to handle according to ACLE. This path should
25086 not be reachable for cmse_nonsecure_entry functions though we prefer
25087 to assert it for now to ensure that future code changes do not silently
25088 change this behavior. */
25089 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25092 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25093 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25094 rtx addr
= gen_rtx_MEM (SImode
,
25095 gen_rtx_POST_INC (SImode
,
25096 stack_pointer_rtx
));
25097 set_mem_alias_set (addr
, get_frame_alias_set ());
25098 XVECEXP (par
, 0, 0) = ret_rtx
;
25099 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25100 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25101 emit_jump_insn (par
);
25105 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25106 saved_regs_mask
|= (1 << PC_REGNUM
);
25107 arm_emit_multi_reg_pop (saved_regs_mask
);
25112 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25113 cmse_nonsecure_entry_clear_before_return ();
25114 emit_jump_insn (simple_return_rtx
);
25119 thumb1_expand_epilogue (void)
25121 HOST_WIDE_INT amount
;
25122 arm_stack_offsets
*offsets
;
25125 /* Naked functions don't have prologues. */
25126 if (IS_NAKED (arm_current_func_type ()))
25129 offsets
= arm_get_frame_offsets ();
25130 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25132 if (frame_pointer_needed
)
25134 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25135 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25137 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25139 gcc_assert (amount
>= 0);
25142 emit_insn (gen_blockage ());
25145 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25146 GEN_INT (amount
)));
25149 /* r3 is always free in the epilogue. */
25150 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25152 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25153 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25157 /* Emit a USE (stack_pointer_rtx), so that
25158 the stack adjustment will not be deleted. */
25159 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25161 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25162 emit_insn (gen_blockage ());
25164 /* Emit a clobber for each insn that will be restored in the epilogue,
25165 so that flow2 will get register lifetimes correct. */
25166 for (regno
= 0; regno
< 13; regno
++)
25167 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25168 emit_clobber (gen_rtx_REG (SImode
, regno
));
25170 if (! df_regs_ever_live_p (LR_REGNUM
))
25171 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25173 /* Clear all caller-saved regs that are not used to return. */
25174 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25175 cmse_nonsecure_entry_clear_before_return ();
25178 /* Epilogue code for APCS frame. */
25180 arm_expand_epilogue_apcs_frame (bool really_return
)
25182 unsigned long func_type
;
25183 unsigned long saved_regs_mask
;
25186 int floats_from_frame
= 0;
25187 arm_stack_offsets
*offsets
;
25189 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25190 func_type
= arm_current_func_type ();
25192 /* Get frame offsets for ARM. */
25193 offsets
= arm_get_frame_offsets ();
25194 saved_regs_mask
= offsets
->saved_regs_mask
;
25196 /* Find the offset of the floating-point save area in the frame. */
25198 = (offsets
->saved_args
25199 + arm_compute_static_chain_stack_bytes ()
25202 /* Compute how many core registers saved and how far away the floats are. */
25203 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25204 if (saved_regs_mask
& (1 << i
))
25207 floats_from_frame
+= 4;
25210 if (TARGET_HARD_FLOAT
)
25213 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25215 /* The offset is from IP_REGNUM. */
25216 int saved_size
= arm_get_vfp_saved_size ();
25217 if (saved_size
> 0)
25220 floats_from_frame
+= saved_size
;
25221 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25222 hard_frame_pointer_rtx
,
25223 GEN_INT (-floats_from_frame
)));
25224 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25225 ip_rtx
, hard_frame_pointer_rtx
);
25228 /* Generate VFP register multi-pop. */
25229 start_reg
= FIRST_VFP_REGNUM
;
25231 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25232 /* Look for a case where a reg does not need restoring. */
25233 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25234 && (!df_regs_ever_live_p (i
+ 1)
25235 || call_used_regs
[i
+ 1]))
25237 if (start_reg
!= i
)
25238 arm_emit_vfp_multi_reg_pop (start_reg
,
25239 (i
- start_reg
) / 2,
25240 gen_rtx_REG (SImode
,
25245 /* Restore the remaining regs that we have discovered (or possibly
25246 even all of them, if the conditional in the for loop never
25248 if (start_reg
!= i
)
25249 arm_emit_vfp_multi_reg_pop (start_reg
,
25250 (i
- start_reg
) / 2,
25251 gen_rtx_REG (SImode
, IP_REGNUM
));
25256 /* The frame pointer is guaranteed to be non-double-word aligned, as
25257 it is set to double-word-aligned old_stack_pointer - 4. */
25259 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25261 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25262 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25264 rtx addr
= gen_frame_mem (V2SImode
,
25265 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25267 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25268 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25269 gen_rtx_REG (V2SImode
, i
),
25275 /* saved_regs_mask should contain IP which contains old stack pointer
25276 at the time of activation creation. Since SP and IP are adjacent registers,
25277 we can restore the value directly into SP. */
25278 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25279 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25280 saved_regs_mask
|= (1 << SP_REGNUM
);
25282 /* There are two registers left in saved_regs_mask - LR and PC. We
25283 only need to restore LR (the return address), but to
25284 save time we can load it directly into PC, unless we need a
25285 special function exit sequence, or we are not really returning. */
25287 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25288 && !crtl
->calls_eh_return
)
25289 /* Delete LR from the register mask, so that LR on
25290 the stack is loaded into the PC in the register mask. */
25291 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25293 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25295 num_regs
= bit_count (saved_regs_mask
);
25296 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25299 emit_insn (gen_blockage ());
25300 /* Unwind the stack to just below the saved registers. */
25301 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25302 hard_frame_pointer_rtx
,
25303 GEN_INT (- 4 * num_regs
)));
25305 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25306 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25309 arm_emit_multi_reg_pop (saved_regs_mask
);
25311 if (IS_INTERRUPT (func_type
))
25313 /* Interrupt handlers will have pushed the
25314 IP onto the stack, so restore it now. */
25316 rtx addr
= gen_rtx_MEM (SImode
,
25317 gen_rtx_POST_INC (SImode
,
25318 stack_pointer_rtx
));
25319 set_mem_alias_set (addr
, get_frame_alias_set ());
25320 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25321 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25322 gen_rtx_REG (SImode
, IP_REGNUM
),
25326 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25329 if (crtl
->calls_eh_return
)
25330 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25332 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25334 if (IS_STACKALIGN (func_type
))
25335 /* Restore the original stack pointer. Before prologue, the stack was
25336 realigned and the original stack pointer saved in r0. For details,
25337 see comment in arm_expand_prologue. */
25338 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25340 emit_jump_insn (simple_return_rtx
);
25343 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25344 function is not a sibcall. */
25346 arm_expand_epilogue (bool really_return
)
25348 unsigned long func_type
;
25349 unsigned long saved_regs_mask
;
25353 arm_stack_offsets
*offsets
;
25355 func_type
= arm_current_func_type ();
25357 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25358 let output_return_instruction take care of instruction emission if any. */
25359 if (IS_NAKED (func_type
)
25360 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25363 emit_jump_insn (simple_return_rtx
);
25367 /* If we are throwing an exception, then we really must be doing a
25368 return, so we can't tail-call. */
25369 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25371 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25373 arm_expand_epilogue_apcs_frame (really_return
);
25377 /* Get frame offsets for ARM. */
25378 offsets
= arm_get_frame_offsets ();
25379 saved_regs_mask
= offsets
->saved_regs_mask
;
25380 num_regs
= bit_count (saved_regs_mask
);
25382 if (frame_pointer_needed
)
25385 /* Restore stack pointer if necessary. */
25388 /* In ARM mode, frame pointer points to first saved register.
25389 Restore stack pointer to last saved register. */
25390 amount
= offsets
->frame
- offsets
->saved_regs
;
25392 /* Force out any pending memory operations that reference stacked data
25393 before stack de-allocation occurs. */
25394 emit_insn (gen_blockage ());
25395 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25396 hard_frame_pointer_rtx
,
25397 GEN_INT (amount
)));
25398 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25400 hard_frame_pointer_rtx
);
25402 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25404 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25408 /* In Thumb-2 mode, the frame pointer points to the last saved
25410 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25413 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25414 hard_frame_pointer_rtx
,
25415 GEN_INT (amount
)));
25416 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25417 hard_frame_pointer_rtx
,
25418 hard_frame_pointer_rtx
);
25421 /* Force out any pending memory operations that reference stacked data
25422 before stack de-allocation occurs. */
25423 emit_insn (gen_blockage ());
25424 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25425 hard_frame_pointer_rtx
));
25426 arm_add_cfa_adjust_cfa_note (insn
, 0,
25428 hard_frame_pointer_rtx
);
25429 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25431 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25436 /* Pop off outgoing args and local frame to adjust stack pointer to
25437 last saved register. */
25438 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25442 /* Force out any pending memory operations that reference stacked data
25443 before stack de-allocation occurs. */
25444 emit_insn (gen_blockage ());
25445 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25447 GEN_INT (amount
)));
25448 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25449 stack_pointer_rtx
, stack_pointer_rtx
);
25450 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25452 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25456 if (TARGET_HARD_FLOAT
)
25458 /* Generate VFP register multi-pop. */
25459 int end_reg
= LAST_VFP_REGNUM
+ 1;
25461 /* Scan the registers in reverse order. We need to match
25462 any groupings made in the prologue and generate matching
25463 vldm operations. The need to match groups is because,
25464 unlike pop, vldm can only do consecutive regs. */
25465 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25466 /* Look for a case where a reg does not need restoring. */
25467 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25468 && (!df_regs_ever_live_p (i
+ 1)
25469 || call_used_regs
[i
+ 1]))
25471 /* Restore the regs discovered so far (from reg+2 to
25473 if (end_reg
> i
+ 2)
25474 arm_emit_vfp_multi_reg_pop (i
+ 2,
25475 (end_reg
- (i
+ 2)) / 2,
25476 stack_pointer_rtx
);
25480 /* Restore the remaining regs that we have discovered (or possibly
25481 even all of them, if the conditional in the for loop never
25483 if (end_reg
> i
+ 2)
25484 arm_emit_vfp_multi_reg_pop (i
+ 2,
25485 (end_reg
- (i
+ 2)) / 2,
25486 stack_pointer_rtx
);
25490 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25491 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25494 rtx addr
= gen_rtx_MEM (V2SImode
,
25495 gen_rtx_POST_INC (SImode
,
25496 stack_pointer_rtx
));
25497 set_mem_alias_set (addr
, get_frame_alias_set ());
25498 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25499 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25500 gen_rtx_REG (V2SImode
, i
),
25502 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25503 stack_pointer_rtx
, stack_pointer_rtx
);
25506 if (saved_regs_mask
)
25509 bool return_in_pc
= false;
25511 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25512 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25513 && !IS_CMSE_ENTRY (func_type
)
25514 && !IS_STACKALIGN (func_type
)
25516 && crtl
->args
.pretend_args_size
== 0
25517 && saved_regs_mask
& (1 << LR_REGNUM
)
25518 && !crtl
->calls_eh_return
)
25520 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25521 saved_regs_mask
|= (1 << PC_REGNUM
);
25522 return_in_pc
= true;
25525 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25527 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25528 if (saved_regs_mask
& (1 << i
))
25530 rtx addr
= gen_rtx_MEM (SImode
,
25531 gen_rtx_POST_INC (SImode
,
25532 stack_pointer_rtx
));
25533 set_mem_alias_set (addr
, get_frame_alias_set ());
25535 if (i
== PC_REGNUM
)
25537 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25538 XVECEXP (insn
, 0, 0) = ret_rtx
;
25539 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25541 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25542 insn
= emit_jump_insn (insn
);
25546 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25548 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25549 gen_rtx_REG (SImode
, i
),
25551 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25553 stack_pointer_rtx
);
25560 && current_tune
->prefer_ldrd_strd
25561 && !optimize_function_for_size_p (cfun
))
25564 thumb2_emit_ldrd_pop (saved_regs_mask
);
25565 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25566 arm_emit_ldrd_pop (saved_regs_mask
);
25568 arm_emit_multi_reg_pop (saved_regs_mask
);
25571 arm_emit_multi_reg_pop (saved_regs_mask
);
25579 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25583 rtx dwarf
= NULL_RTX
;
25585 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25587 GEN_INT (amount
)));
25589 RTX_FRAME_RELATED_P (tmp
) = 1;
25591 if (cfun
->machine
->uses_anonymous_args
)
25593 /* Restore pretend args. Refer arm_expand_prologue on how to save
25594 pretend_args in stack. */
25595 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25596 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25597 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25598 if (saved_regs_mask
& (1 << i
))
25600 rtx reg
= gen_rtx_REG (SImode
, i
);
25601 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25604 REG_NOTES (tmp
) = dwarf
;
25606 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25607 stack_pointer_rtx
, stack_pointer_rtx
);
25610 /* Clear all caller-saved regs that are not used to return. */
25611 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25613 /* CMSE_ENTRY always returns. */
25614 gcc_assert (really_return
);
25615 cmse_nonsecure_entry_clear_before_return ();
25618 if (!really_return
)
25621 if (crtl
->calls_eh_return
)
25622 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25624 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25626 if (IS_STACKALIGN (func_type
))
25627 /* Restore the original stack pointer. Before prologue, the stack was
25628 realigned and the original stack pointer saved in r0. For details,
25629 see comment in arm_expand_prologue. */
25630 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25632 emit_jump_insn (simple_return_rtx
);
25635 /* Implementation of insn prologue_thumb1_interwork. This is the first
25636 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25639 thumb1_output_interwork (void)
25642 FILE *f
= asm_out_file
;
25644 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25645 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25647 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25649 /* Generate code sequence to switch us into Thumb mode. */
25650 /* The .code 32 directive has already been emitted by
25651 ASM_DECLARE_FUNCTION_NAME. */
25652 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25653 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25655 /* Generate a label, so that the debugger will notice the
25656 change in instruction sets. This label is also used by
25657 the assembler to bypass the ARM code when this function
25658 is called from a Thumb encoded function elsewhere in the
25659 same file. Hence the definition of STUB_NAME here must
25660 agree with the definition in gas/config/tc-arm.c. */
25662 #define STUB_NAME ".real_start_of"
25664 fprintf (f
, "\t.code\t16\n");
25666 if (arm_dllexport_name_p (name
))
25667 name
= arm_strip_name_encoding (name
);
25669 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25670 fprintf (f
, "\t.thumb_func\n");
25671 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25676 /* Handle the case of a double word load into a low register from
25677 a computed memory address. The computed address may involve a
25678 register which is overwritten by the load. */
25680 thumb_load_double_from_address (rtx
*operands
)
25688 gcc_assert (REG_P (operands
[0]));
25689 gcc_assert (MEM_P (operands
[1]));
25691 /* Get the memory address. */
25692 addr
= XEXP (operands
[1], 0);
25694 /* Work out how the memory address is computed. */
25695 switch (GET_CODE (addr
))
25698 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25700 if (REGNO (operands
[0]) == REGNO (addr
))
25702 output_asm_insn ("ldr\t%H0, %2", operands
);
25703 output_asm_insn ("ldr\t%0, %1", operands
);
25707 output_asm_insn ("ldr\t%0, %1", operands
);
25708 output_asm_insn ("ldr\t%H0, %2", operands
);
25713 /* Compute <address> + 4 for the high order load. */
25714 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25716 output_asm_insn ("ldr\t%0, %1", operands
);
25717 output_asm_insn ("ldr\t%H0, %2", operands
);
25721 arg1
= XEXP (addr
, 0);
25722 arg2
= XEXP (addr
, 1);
25724 if (CONSTANT_P (arg1
))
25725 base
= arg2
, offset
= arg1
;
25727 base
= arg1
, offset
= arg2
;
25729 gcc_assert (REG_P (base
));
25731 /* Catch the case of <address> = <reg> + <reg> */
25732 if (REG_P (offset
))
25734 int reg_offset
= REGNO (offset
);
25735 int reg_base
= REGNO (base
);
25736 int reg_dest
= REGNO (operands
[0]);
25738 /* Add the base and offset registers together into the
25739 higher destination register. */
25740 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25741 reg_dest
+ 1, reg_base
, reg_offset
);
25743 /* Load the lower destination register from the address in
25744 the higher destination register. */
25745 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25746 reg_dest
, reg_dest
+ 1);
25748 /* Load the higher destination register from its own address
25750 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25751 reg_dest
+ 1, reg_dest
+ 1);
25755 /* Compute <address> + 4 for the high order load. */
25756 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25758 /* If the computed address is held in the low order register
25759 then load the high order register first, otherwise always
25760 load the low order register first. */
25761 if (REGNO (operands
[0]) == REGNO (base
))
25763 output_asm_insn ("ldr\t%H0, %2", operands
);
25764 output_asm_insn ("ldr\t%0, %1", operands
);
25768 output_asm_insn ("ldr\t%0, %1", operands
);
25769 output_asm_insn ("ldr\t%H0, %2", operands
);
25775 /* With no registers to worry about we can just load the value
25777 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25779 output_asm_insn ("ldr\t%H0, %2", operands
);
25780 output_asm_insn ("ldr\t%0, %1", operands
);
25784 gcc_unreachable ();
25791 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25796 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25797 std::swap (operands
[4], operands
[5]);
25799 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25800 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25804 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25805 std::swap (operands
[4], operands
[5]);
25806 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25807 std::swap (operands
[5], operands
[6]);
25808 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25809 std::swap (operands
[4], operands
[5]);
25811 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25812 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25816 gcc_unreachable ();
25822 /* Output a call-via instruction for thumb state. */
25824 thumb_call_via_reg (rtx reg
)
25826 int regno
= REGNO (reg
);
25829 gcc_assert (regno
< LR_REGNUM
);
25831 /* If we are in the normal text section we can use a single instance
25832 per compilation unit. If we are doing function sections, then we need
25833 an entry per section, since we can't rely on reachability. */
25834 if (in_section
== text_section
)
25836 thumb_call_reg_needed
= 1;
25838 if (thumb_call_via_label
[regno
] == NULL
)
25839 thumb_call_via_label
[regno
] = gen_label_rtx ();
25840 labelp
= thumb_call_via_label
+ regno
;
25844 if (cfun
->machine
->call_via
[regno
] == NULL
)
25845 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25846 labelp
= cfun
->machine
->call_via
+ regno
;
25849 output_asm_insn ("bl\t%a0", labelp
);
25853 /* Routines for generating rtl. */
25855 thumb_expand_movmemqi (rtx
*operands
)
25857 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25858 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25859 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25860 HOST_WIDE_INT offset
= 0;
25864 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25870 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25876 rtx reg
= gen_reg_rtx (SImode
);
25877 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25878 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25885 rtx reg
= gen_reg_rtx (HImode
);
25886 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25887 plus_constant (Pmode
, in
,
25889 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25898 rtx reg
= gen_reg_rtx (QImode
);
25899 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25900 plus_constant (Pmode
, in
,
25902 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25909 thumb_reload_out_hi (rtx
*operands
)
25911 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25914 /* Return the length of a function name prefix
25915 that starts with the character 'c'. */
25917 arm_get_strip_length (int c
)
25921 ARM_NAME_ENCODING_LENGTHS
25926 /* Return a pointer to a function's name with any
25927 and all prefix encodings stripped from it. */
25929 arm_strip_name_encoding (const char *name
)
25933 while ((skip
= arm_get_strip_length (* name
)))
25939 /* If there is a '*' anywhere in the name's prefix, then
25940 emit the stripped name verbatim, otherwise prepend an
25941 underscore if leading underscores are being used. */
25943 arm_asm_output_labelref (FILE *stream
, const char *name
)
25948 while ((skip
= arm_get_strip_length (* name
)))
25950 verbatim
|= (*name
== '*');
25955 fputs (name
, stream
);
25957 asm_fprintf (stream
, "%U%s", name
);
25960 /* This function is used to emit an EABI tag and its associated value.
25961 We emit the numerical value of the tag in case the assembler does not
25962 support textual tags. (Eg gas prior to 2.20). If requested we include
25963 the tag name in a comment so that anyone reading the assembler output
25964 will know which tag is being set.
25966 This function is not static because arm-c.c needs it too. */
25969 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25971 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25972 if (flag_verbose_asm
|| flag_debug_asm
)
25973 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25974 asm_fprintf (asm_out_file
, "\n");
25977 /* This function is used to print CPU tuning information as comment
25978 in assembler file. Pointers are not printed for now. */
25981 arm_print_tune_info (void)
25983 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
25984 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
25985 current_tune
->constant_limit
);
25986 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25987 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
25988 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25989 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
25990 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25991 "prefetch.l1_cache_size:\t%d\n",
25992 current_tune
->prefetch
.l1_cache_size
);
25993 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25994 "prefetch.l1_cache_line_size:\t%d\n",
25995 current_tune
->prefetch
.l1_cache_line_size
);
25996 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
25997 "prefer_constant_pool:\t%d\n",
25998 (int) current_tune
->prefer_constant_pool
);
25999 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26000 "branch_cost:\t(s:speed, p:predictable)\n");
26001 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26002 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26003 current_tune
->branch_cost (false, false));
26004 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26005 current_tune
->branch_cost (false, true));
26006 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26007 current_tune
->branch_cost (true, false));
26008 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26009 current_tune
->branch_cost (true, true));
26010 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26011 "prefer_ldrd_strd:\t%d\n",
26012 (int) current_tune
->prefer_ldrd_strd
);
26013 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26014 "logical_op_non_short_circuit:\t[%d,%d]\n",
26015 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26016 (int) current_tune
->logical_op_non_short_circuit_arm
);
26017 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26018 "prefer_neon_for_64bits:\t%d\n",
26019 (int) current_tune
->prefer_neon_for_64bits
);
26020 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26021 "disparage_flag_setting_t16_encodings:\t%d\n",
26022 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26023 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26024 "string_ops_prefer_neon:\t%d\n",
26025 (int) current_tune
->string_ops_prefer_neon
);
26026 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26027 "max_insns_inline_memset:\t%d\n",
26028 current_tune
->max_insns_inline_memset
);
26029 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26030 current_tune
->fusible_ops
);
26031 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26032 (int) current_tune
->sched_autopref
);
26036 arm_file_start (void)
26042 /* We don't have a specified CPU. Use the architecture to
26045 Note: it might be better to do this unconditionally, then the
26046 assembler would not need to know about all new CPU names as
26048 if (!arm_active_target
.core_name
)
26050 /* armv7ve doesn't support any extensions. */
26051 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26053 /* Keep backward compatability for assemblers
26054 which don't support armv7ve. */
26055 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26056 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26057 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26058 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26059 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26063 const char* pos
= strchr (arm_active_target
.arch_name
, '+');
26067 gcc_assert (strlen (arm_active_target
.arch_name
)
26068 <= sizeof (buf
) / sizeof (*pos
));
26069 strncpy (buf
, arm_active_target
.arch_name
,
26070 (pos
- arm_active_target
.arch_name
) * sizeof (*pos
));
26071 buf
[pos
- arm_active_target
.arch_name
] = '\0';
26072 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
26073 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
26076 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26077 arm_active_target
.arch_name
);
26080 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26081 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26082 arm_active_target
.core_name
+ 8);
26085 const char* truncated_name
26086 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26087 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26090 if (print_tune_info
)
26091 arm_print_tune_info ();
26093 if (! TARGET_SOFT_FLOAT
)
26095 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26096 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26098 if (TARGET_HARD_FLOAT_ABI
)
26099 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26102 /* Some of these attributes only apply when the corresponding features
26103 are used. However we don't have any easy way of figuring this out.
26104 Conservatively record the setting that would have been used. */
26106 if (flag_rounding_math
)
26107 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26109 if (!flag_unsafe_math_optimizations
)
26111 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26112 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26114 if (flag_signaling_nans
)
26115 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26117 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26118 flag_finite_math_only
? 1 : 3);
26120 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26121 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26122 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26123 flag_short_enums
? 1 : 2);
26125 /* Tag_ABI_optimization_goals. */
26128 else if (optimize
>= 2)
26134 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26136 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26139 if (arm_fp16_format
)
26140 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26141 (int) arm_fp16_format
);
26143 if (arm_lang_output_object_attributes_hook
)
26144 arm_lang_output_object_attributes_hook();
26147 default_file_start ();
26151 arm_file_end (void)
26155 if (NEED_INDICATE_EXEC_STACK
)
26156 /* Add .note.GNU-stack. */
26157 file_end_indicate_exec_stack ();
26159 if (! thumb_call_reg_needed
)
26162 switch_to_section (text_section
);
26163 asm_fprintf (asm_out_file
, "\t.code 16\n");
26164 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26166 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26168 rtx label
= thumb_call_via_label
[regno
];
26172 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26173 CODE_LABEL_NUMBER (label
));
26174 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26180 /* Symbols in the text segment can be accessed without indirecting via the
26181 constant pool; it may take an extra binary operation, but this is still
26182 faster than indirecting via memory. Don't do this when not optimizing,
26183 since we won't be calculating al of the offsets necessary to do this
26187 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26189 if (optimize
> 0 && TREE_CONSTANT (decl
))
26190 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26192 default_encode_section_info (decl
, rtl
, first
);
26194 #endif /* !ARM_PE */
26197 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26199 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26200 && !strcmp (prefix
, "L"))
26202 arm_ccfsm_state
= 0;
26203 arm_target_insn
= NULL
;
26205 default_internal_label (stream
, prefix
, labelno
);
26208 /* Output code to add DELTA to the first argument, and then jump
26209 to FUNCTION. Used for C++ multiple inheritance. */
26212 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26213 HOST_WIDE_INT
, tree function
)
26215 static int thunk_label
= 0;
26218 int mi_delta
= delta
;
26219 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26221 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26224 mi_delta
= - mi_delta
;
26226 final_start_function (emit_barrier (), file
, 1);
26230 int labelno
= thunk_label
++;
26231 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26232 /* Thunks are entered in arm mode when avaiable. */
26233 if (TARGET_THUMB1_ONLY
)
26235 /* push r3 so we can use it as a temporary. */
26236 /* TODO: Omit this save if r3 is not used. */
26237 fputs ("\tpush {r3}\n", file
);
26238 fputs ("\tldr\tr3, ", file
);
26242 fputs ("\tldr\tr12, ", file
);
26244 assemble_name (file
, label
);
26245 fputc ('\n', file
);
26248 /* If we are generating PIC, the ldr instruction below loads
26249 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26250 the address of the add + 8, so we have:
26252 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26255 Note that we have "+ 1" because some versions of GNU ld
26256 don't set the low bit of the result for R_ARM_REL32
26257 relocations against thumb function symbols.
26258 On ARMv6M this is +4, not +8. */
26259 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26260 assemble_name (file
, labelpc
);
26261 fputs (":\n", file
);
26262 if (TARGET_THUMB1_ONLY
)
26264 /* This is 2 insns after the start of the thunk, so we know it
26265 is 4-byte aligned. */
26266 fputs ("\tadd\tr3, pc, r3\n", file
);
26267 fputs ("\tmov r12, r3\n", file
);
26270 fputs ("\tadd\tr12, pc, r12\n", file
);
26272 else if (TARGET_THUMB1_ONLY
)
26273 fputs ("\tmov r12, r3\n", file
);
26275 if (TARGET_THUMB1_ONLY
)
26277 if (mi_delta
> 255)
26279 fputs ("\tldr\tr3, ", file
);
26280 assemble_name (file
, label
);
26281 fputs ("+4\n", file
);
26282 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26283 mi_op
, this_regno
, this_regno
);
26285 else if (mi_delta
!= 0)
26287 /* Thumb1 unified syntax requires s suffix in instruction name when
26288 one of the operands is immediate. */
26289 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26290 mi_op
, this_regno
, this_regno
,
26296 /* TODO: Use movw/movt for large constants when available. */
26297 while (mi_delta
!= 0)
26299 if ((mi_delta
& (3 << shift
)) == 0)
26303 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26304 mi_op
, this_regno
, this_regno
,
26305 mi_delta
& (0xff << shift
));
26306 mi_delta
&= ~(0xff << shift
);
26313 if (TARGET_THUMB1_ONLY
)
26314 fputs ("\tpop\t{r3}\n", file
);
26316 fprintf (file
, "\tbx\tr12\n");
26317 ASM_OUTPUT_ALIGN (file
, 2);
26318 assemble_name (file
, label
);
26319 fputs (":\n", file
);
26322 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26323 rtx tem
= XEXP (DECL_RTL (function
), 0);
26324 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26325 pipeline offset is four rather than eight. Adjust the offset
26327 tem
= plus_constant (GET_MODE (tem
), tem
,
26328 TARGET_THUMB1_ONLY
? -3 : -7);
26329 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26331 gen_rtx_SYMBOL_REF (Pmode
,
26332 ggc_strdup (labelpc
)));
26333 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26336 /* Output ".word .LTHUNKn". */
26337 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26339 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26340 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26344 fputs ("\tb\t", file
);
26345 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26346 if (NEED_PLT_RELOC
)
26347 fputs ("(PLT)", file
);
26348 fputc ('\n', file
);
26351 final_end_function ();
26354 /* MI thunk handling for TARGET_32BIT. */
26357 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26358 HOST_WIDE_INT vcall_offset
, tree function
)
26360 /* On ARM, this_regno is R0 or R1 depending on
26361 whether the function returns an aggregate or not.
26363 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26365 ? R1_REGNUM
: R0_REGNUM
);
26367 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26368 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26369 reload_completed
= 1;
26370 emit_note (NOTE_INSN_PROLOGUE_END
);
26372 /* Add DELTA to THIS_RTX. */
26374 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26375 delta
, this_rtx
, this_rtx
, false);
26377 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26378 if (vcall_offset
!= 0)
26380 /* Load *THIS_RTX. */
26381 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26382 /* Compute *THIS_RTX + VCALL_OFFSET. */
26383 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26385 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26386 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26387 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26390 /* Generate a tail call to the target function. */
26391 if (!TREE_USED (function
))
26393 assemble_external (function
);
26394 TREE_USED (function
) = 1;
26396 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26397 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26398 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26399 SIBLING_CALL_P (insn
) = 1;
26401 insn
= get_insns ();
26402 shorten_branches (insn
);
26403 final_start_function (insn
, file
, 1);
26404 final (insn
, file
, 1);
26405 final_end_function ();
26407 /* Stop pretending this is a post-reload pass. */
26408 reload_completed
= 0;
26411 /* Output code to add DELTA to the first argument, and then jump
26412 to FUNCTION. Used for C++ multiple inheritance. */
26415 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26416 HOST_WIDE_INT vcall_offset
, tree function
)
26419 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26421 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26425 arm_emit_vector_const (FILE *file
, rtx x
)
26428 const char * pattern
;
26430 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26432 switch (GET_MODE (x
))
26434 case V2SImode
: pattern
= "%08x"; break;
26435 case V4HImode
: pattern
= "%04x"; break;
26436 case V8QImode
: pattern
= "%02x"; break;
26437 default: gcc_unreachable ();
26440 fprintf (file
, "0x");
26441 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26445 element
= CONST_VECTOR_ELT (x
, i
);
26446 fprintf (file
, pattern
, INTVAL (element
));
26452 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26453 HFmode constant pool entries are actually loaded with ldr. */
26455 arm_emit_fp16_const (rtx c
)
26459 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26460 if (WORDS_BIG_ENDIAN
)
26461 assemble_zeros (2);
26462 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26463 if (!WORDS_BIG_ENDIAN
)
26464 assemble_zeros (2);
26468 arm_output_load_gr (rtx
*operands
)
26475 if (!MEM_P (operands
[1])
26476 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26477 || !REG_P (reg
= XEXP (sum
, 0))
26478 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26479 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26480 return "wldrw%?\t%0, %1";
26482 /* Fix up an out-of-range load of a GR register. */
26483 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26484 wcgr
= operands
[0];
26486 output_asm_insn ("ldr%?\t%0, %1", operands
);
26488 operands
[0] = wcgr
;
26490 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26491 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26496 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26498 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26499 named arg and all anonymous args onto the stack.
26500 XXX I know the prologue shouldn't be pushing registers, but it is faster
26504 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26508 int second_time ATTRIBUTE_UNUSED
)
26510 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26513 cfun
->machine
->uses_anonymous_args
= 1;
26514 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26516 nregs
= pcum
->aapcs_ncrn
;
26517 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
26521 nregs
= pcum
->nregs
;
26523 if (nregs
< NUM_ARG_REGS
)
26524 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26527 /* We can't rely on the caller doing the proper promotion when
26528 using APCS or ATPCS. */
26531 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26533 return !TARGET_AAPCS_BASED
;
26536 static machine_mode
26537 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26539 int *punsignedp ATTRIBUTE_UNUSED
,
26540 const_tree fntype ATTRIBUTE_UNUSED
,
26541 int for_return ATTRIBUTE_UNUSED
)
26543 if (GET_MODE_CLASS (mode
) == MODE_INT
26544 && GET_MODE_SIZE (mode
) < 4)
26550 /* AAPCS based ABIs use short enums by default. */
26553 arm_default_short_enums (void)
26555 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26559 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26562 arm_align_anon_bitfield (void)
26564 return TARGET_AAPCS_BASED
;
26568 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26571 arm_cxx_guard_type (void)
26573 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26577 /* The EABI says test the least significant bit of a guard variable. */
26580 arm_cxx_guard_mask_bit (void)
26582 return TARGET_AAPCS_BASED
;
26586 /* The EABI specifies that all array cookies are 8 bytes long. */
26589 arm_get_cookie_size (tree type
)
26593 if (!TARGET_AAPCS_BASED
)
26594 return default_cxx_get_cookie_size (type
);
26596 size
= build_int_cst (sizetype
, 8);
26601 /* The EABI says that array cookies should also contain the element size. */
26604 arm_cookie_has_size (void)
26606 return TARGET_AAPCS_BASED
;
26610 /* The EABI says constructors and destructors should return a pointer to
26611 the object constructed/destroyed. */
26614 arm_cxx_cdtor_returns_this (void)
26616 return TARGET_AAPCS_BASED
;
26619 /* The EABI says that an inline function may never be the key
26623 arm_cxx_key_method_may_be_inline (void)
26625 return !TARGET_AAPCS_BASED
;
26629 arm_cxx_determine_class_data_visibility (tree decl
)
26631 if (!TARGET_AAPCS_BASED
26632 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26635 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26636 is exported. However, on systems without dynamic vague linkage,
26637 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26638 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26639 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26641 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26642 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26646 arm_cxx_class_data_always_comdat (void)
26648 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26649 vague linkage if the class has no key function. */
26650 return !TARGET_AAPCS_BASED
;
26654 /* The EABI says __aeabi_atexit should be used to register static
26658 arm_cxx_use_aeabi_atexit (void)
26660 return TARGET_AAPCS_BASED
;
26665 arm_set_return_address (rtx source
, rtx scratch
)
26667 arm_stack_offsets
*offsets
;
26668 HOST_WIDE_INT delta
;
26670 unsigned long saved_regs
;
26672 offsets
= arm_get_frame_offsets ();
26673 saved_regs
= offsets
->saved_regs_mask
;
26675 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26676 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26679 if (frame_pointer_needed
)
26680 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26683 /* LR will be the first saved register. */
26684 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26689 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26690 GEN_INT (delta
& ~4095)));
26695 addr
= stack_pointer_rtx
;
26697 addr
= plus_constant (Pmode
, addr
, delta
);
26699 /* The store needs to be marked as frame related in order to prevent
26700 DSE from deleting it as dead if it is based on fp. */
26701 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26702 RTX_FRAME_RELATED_P (insn
) = 1;
26703 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26709 thumb_set_return_address (rtx source
, rtx scratch
)
26711 arm_stack_offsets
*offsets
;
26712 HOST_WIDE_INT delta
;
26713 HOST_WIDE_INT limit
;
26716 unsigned long mask
;
26720 offsets
= arm_get_frame_offsets ();
26721 mask
= offsets
->saved_regs_mask
;
26722 if (mask
& (1 << LR_REGNUM
))
26725 /* Find the saved regs. */
26726 if (frame_pointer_needed
)
26728 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26729 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26735 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26738 /* Allow for the stack frame. */
26739 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26741 /* The link register is always the first saved register. */
26744 /* Construct the address. */
26745 addr
= gen_rtx_REG (SImode
, reg
);
26748 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26749 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26753 addr
= plus_constant (Pmode
, addr
, delta
);
26755 /* The store needs to be marked as frame related in order to prevent
26756 DSE from deleting it as dead if it is based on fp. */
26757 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26758 RTX_FRAME_RELATED_P (insn
) = 1;
26759 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26762 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26765 /* Implements target hook vector_mode_supported_p. */
26767 arm_vector_mode_supported_p (machine_mode mode
)
26769 /* Neon also supports V2SImode, etc. listed in the clause below. */
26770 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26771 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26772 || mode
== V2DImode
|| mode
== V8HFmode
))
26775 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26776 && ((mode
== V2SImode
)
26777 || (mode
== V4HImode
)
26778 || (mode
== V8QImode
)))
26781 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26782 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26783 || mode
== V2HAmode
))
26789 /* Implements target hook array_mode_supported_p. */
26792 arm_array_mode_supported_p (machine_mode mode
,
26793 unsigned HOST_WIDE_INT nelems
)
26796 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26797 && (nelems
>= 2 && nelems
<= 4))
26803 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26804 registers when autovectorizing for Neon, at least until multiple vector
26805 widths are supported properly by the middle-end. */
26807 static machine_mode
26808 arm_preferred_simd_mode (machine_mode mode
)
26814 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26816 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26818 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26820 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26822 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26829 if (TARGET_REALLY_IWMMXT
)
26845 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26847 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26848 using r0-r4 for function arguments, r7 for the stack frame and don't have
26849 enough left over to do doubleword arithmetic. For Thumb-2 all the
26850 potentially problematic instructions accept high registers so this is not
26851 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26852 that require many low registers. */
26854 arm_class_likely_spilled_p (reg_class_t rclass
)
26856 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26857 || rclass
== CC_REG
)
26863 /* Implements target hook small_register_classes_for_mode_p. */
26865 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26867 return TARGET_THUMB1
;
26870 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26871 ARM insns and therefore guarantee that the shift count is modulo 256.
26872 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26873 guarantee no particular behavior for out-of-range counts. */
26875 static unsigned HOST_WIDE_INT
26876 arm_shift_truncation_mask (machine_mode mode
)
26878 return mode
== SImode
? 255 : 0;
26882 /* Map internal gcc register numbers to DWARF2 register numbers. */
26885 arm_dbx_register_number (unsigned int regno
)
26890 if (IS_VFP_REGNUM (regno
))
26892 /* See comment in arm_dwarf_register_span. */
26893 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26894 return 64 + regno
- FIRST_VFP_REGNUM
;
26896 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26899 if (IS_IWMMXT_GR_REGNUM (regno
))
26900 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26902 if (IS_IWMMXT_REGNUM (regno
))
26903 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26905 return DWARF_FRAME_REGISTERS
;
26908 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26909 GCC models tham as 64 32-bit registers, so we need to describe this to
26910 the DWARF generation code. Other registers can use the default. */
26912 arm_dwarf_register_span (rtx rtl
)
26920 regno
= REGNO (rtl
);
26921 if (!IS_VFP_REGNUM (regno
))
26924 /* XXX FIXME: The EABI defines two VFP register ranges:
26925 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26927 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26928 corresponding D register. Until GDB supports this, we shall use the
26929 legacy encodings. We also use these encodings for D0-D15 for
26930 compatibility with older debuggers. */
26931 mode
= GET_MODE (rtl
);
26932 if (GET_MODE_SIZE (mode
) < 8)
26935 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26937 nregs
= GET_MODE_SIZE (mode
) / 4;
26938 for (i
= 0; i
< nregs
; i
+= 2)
26939 if (TARGET_BIG_END
)
26941 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26942 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26946 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26947 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26952 nregs
= GET_MODE_SIZE (mode
) / 8;
26953 for (i
= 0; i
< nregs
; i
++)
26954 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26957 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26960 #if ARM_UNWIND_INFO
26961 /* Emit unwind directives for a store-multiple instruction or stack pointer
26962 push during alignment.
26963 These should only ever be generated by the function prologue code, so
26964 expect them to have a particular form.
26965 The store-multiple instruction sometimes pushes pc as the last register,
26966 although it should not be tracked into unwind information, or for -Os
26967 sometimes pushes some dummy registers before first register that needs
26968 to be tracked in unwind information; such dummy registers are there just
26969 to avoid separate stack adjustment, and will not be restored in the
26973 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26976 HOST_WIDE_INT offset
;
26977 HOST_WIDE_INT nregs
;
26981 unsigned padfirst
= 0, padlast
= 0;
26984 e
= XVECEXP (p
, 0, 0);
26985 gcc_assert (GET_CODE (e
) == SET
);
26987 /* First insn will adjust the stack pointer. */
26988 gcc_assert (GET_CODE (e
) == SET
26989 && REG_P (SET_DEST (e
))
26990 && REGNO (SET_DEST (e
)) == SP_REGNUM
26991 && GET_CODE (SET_SRC (e
)) == PLUS
);
26993 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26994 nregs
= XVECLEN (p
, 0) - 1;
26995 gcc_assert (nregs
);
26997 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27000 /* For -Os dummy registers can be pushed at the beginning to
27001 avoid separate stack pointer adjustment. */
27002 e
= XVECEXP (p
, 0, 1);
27003 e
= XEXP (SET_DEST (e
), 0);
27004 if (GET_CODE (e
) == PLUS
)
27005 padfirst
= INTVAL (XEXP (e
, 1));
27006 gcc_assert (padfirst
== 0 || optimize_size
);
27007 /* The function prologue may also push pc, but not annotate it as it is
27008 never restored. We turn this into a stack pointer adjustment. */
27009 e
= XVECEXP (p
, 0, nregs
);
27010 e
= XEXP (SET_DEST (e
), 0);
27011 if (GET_CODE (e
) == PLUS
)
27012 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27014 padlast
= offset
- 4;
27015 gcc_assert (padlast
== 0 || padlast
== 4);
27017 fprintf (asm_out_file
, "\t.pad #4\n");
27019 fprintf (asm_out_file
, "\t.save {");
27021 else if (IS_VFP_REGNUM (reg
))
27024 fprintf (asm_out_file
, "\t.vsave {");
27027 /* Unknown register type. */
27028 gcc_unreachable ();
27030 /* If the stack increment doesn't match the size of the saved registers,
27031 something has gone horribly wrong. */
27032 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27036 /* The remaining insns will describe the stores. */
27037 for (i
= 1; i
<= nregs
; i
++)
27039 /* Expect (set (mem <addr>) (reg)).
27040 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27041 e
= XVECEXP (p
, 0, i
);
27042 gcc_assert (GET_CODE (e
) == SET
27043 && MEM_P (SET_DEST (e
))
27044 && REG_P (SET_SRC (e
)));
27046 reg
= REGNO (SET_SRC (e
));
27047 gcc_assert (reg
>= lastreg
);
27050 fprintf (asm_out_file
, ", ");
27051 /* We can't use %r for vfp because we need to use the
27052 double precision register names. */
27053 if (IS_VFP_REGNUM (reg
))
27054 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27056 asm_fprintf (asm_out_file
, "%r", reg
);
27060 /* Check that the addresses are consecutive. */
27061 e
= XEXP (SET_DEST (e
), 0);
27062 if (GET_CODE (e
) == PLUS
)
27063 gcc_assert (REG_P (XEXP (e
, 0))
27064 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27065 && CONST_INT_P (XEXP (e
, 1))
27066 && offset
== INTVAL (XEXP (e
, 1)));
27070 && REGNO (e
) == SP_REGNUM
);
27071 offset
+= reg_size
;
27074 fprintf (asm_out_file
, "}\n");
27076 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27079 /* Emit unwind directives for a SET. */
27082 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27090 switch (GET_CODE (e0
))
27093 /* Pushing a single register. */
27094 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27095 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27096 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27099 asm_fprintf (asm_out_file
, "\t.save ");
27100 if (IS_VFP_REGNUM (REGNO (e1
)))
27101 asm_fprintf(asm_out_file
, "{d%d}\n",
27102 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27104 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27108 if (REGNO (e0
) == SP_REGNUM
)
27110 /* A stack increment. */
27111 if (GET_CODE (e1
) != PLUS
27112 || !REG_P (XEXP (e1
, 0))
27113 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27114 || !CONST_INT_P (XEXP (e1
, 1)))
27117 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27118 -INTVAL (XEXP (e1
, 1)));
27120 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27122 HOST_WIDE_INT offset
;
27124 if (GET_CODE (e1
) == PLUS
)
27126 if (!REG_P (XEXP (e1
, 0))
27127 || !CONST_INT_P (XEXP (e1
, 1)))
27129 reg
= REGNO (XEXP (e1
, 0));
27130 offset
= INTVAL (XEXP (e1
, 1));
27131 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27132 HARD_FRAME_POINTER_REGNUM
, reg
,
27135 else if (REG_P (e1
))
27138 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27139 HARD_FRAME_POINTER_REGNUM
, reg
);
27144 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27146 /* Move from sp to reg. */
27147 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27149 else if (GET_CODE (e1
) == PLUS
27150 && REG_P (XEXP (e1
, 0))
27151 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27152 && CONST_INT_P (XEXP (e1
, 1)))
27154 /* Set reg to offset from sp. */
27155 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27156 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27168 /* Emit unwind directives for the given insn. */
27171 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27174 bool handled_one
= false;
27176 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27179 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27180 && (TREE_NOTHROW (current_function_decl
)
27181 || crtl
->all_throwers_are_sibcalls
))
27184 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27187 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27189 switch (REG_NOTE_KIND (note
))
27191 case REG_FRAME_RELATED_EXPR
:
27192 pat
= XEXP (note
, 0);
27195 case REG_CFA_REGISTER
:
27196 pat
= XEXP (note
, 0);
27199 pat
= PATTERN (insn
);
27200 if (GET_CODE (pat
) == PARALLEL
)
27201 pat
= XVECEXP (pat
, 0, 0);
27204 /* Only emitted for IS_STACKALIGN re-alignment. */
27209 src
= SET_SRC (pat
);
27210 dest
= SET_DEST (pat
);
27212 gcc_assert (src
== stack_pointer_rtx
);
27213 reg
= REGNO (dest
);
27214 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27217 handled_one
= true;
27220 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27221 to get correct dwarf information for shrink-wrap. We should not
27222 emit unwind information for it because these are used either for
27223 pretend arguments or notes to adjust sp and restore registers from
27225 case REG_CFA_DEF_CFA
:
27226 case REG_CFA_ADJUST_CFA
:
27227 case REG_CFA_RESTORE
:
27230 case REG_CFA_EXPRESSION
:
27231 case REG_CFA_OFFSET
:
27232 /* ??? Only handling here what we actually emit. */
27233 gcc_unreachable ();
27241 pat
= PATTERN (insn
);
27244 switch (GET_CODE (pat
))
27247 arm_unwind_emit_set (asm_out_file
, pat
);
27251 /* Store multiple. */
27252 arm_unwind_emit_sequence (asm_out_file
, pat
);
27261 /* Output a reference from a function exception table to the type_info
27262 object X. The EABI specifies that the symbol should be relocated by
27263 an R_ARM_TARGET2 relocation. */
27266 arm_output_ttype (rtx x
)
27268 fputs ("\t.word\t", asm_out_file
);
27269 output_addr_const (asm_out_file
, x
);
27270 /* Use special relocations for symbol references. */
27271 if (!CONST_INT_P (x
))
27272 fputs ("(TARGET2)", asm_out_file
);
27273 fputc ('\n', asm_out_file
);
27278 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27281 arm_asm_emit_except_personality (rtx personality
)
27283 fputs ("\t.personality\t", asm_out_file
);
27284 output_addr_const (asm_out_file
, personality
);
27285 fputc ('\n', asm_out_file
);
27287 #endif /* ARM_UNWIND_INFO */
27289 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27292 arm_asm_init_sections (void)
27294 #if ARM_UNWIND_INFO
27295 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27297 #endif /* ARM_UNWIND_INFO */
27299 #ifdef OBJECT_FORMAT_ELF
27300 if (target_pure_code
)
27301 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27305 /* Output unwind directives for the start/end of a function. */
27308 arm_output_fn_unwind (FILE * f
, bool prologue
)
27310 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27314 fputs ("\t.fnstart\n", f
);
27317 /* If this function will never be unwound, then mark it as such.
27318 The came condition is used in arm_unwind_emit to suppress
27319 the frame annotations. */
27320 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27321 && (TREE_NOTHROW (current_function_decl
)
27322 || crtl
->all_throwers_are_sibcalls
))
27323 fputs("\t.cantunwind\n", f
);
27325 fputs ("\t.fnend\n", f
);
27330 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27332 enum tls_reloc reloc
;
27335 val
= XVECEXP (x
, 0, 0);
27336 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27338 output_addr_const (fp
, val
);
27343 fputs ("(tlsgd)", fp
);
27346 fputs ("(tlsldm)", fp
);
27349 fputs ("(tlsldo)", fp
);
27352 fputs ("(gottpoff)", fp
);
27355 fputs ("(tpoff)", fp
);
27358 fputs ("(tlsdesc)", fp
);
27361 gcc_unreachable ();
27370 fputs (" + (. - ", fp
);
27371 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27372 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27373 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27374 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27384 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27387 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27389 gcc_assert (size
== 4);
27390 fputs ("\t.word\t", file
);
27391 output_addr_const (file
, x
);
27392 fputs ("(tlsldo)", file
);
27395 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27398 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27400 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27401 return arm_emit_tls_decoration (fp
, x
);
27402 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27405 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27407 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27408 assemble_name_raw (fp
, label
);
27412 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27414 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27418 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27422 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27424 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27428 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27432 else if (GET_CODE (x
) == CONST_VECTOR
)
27433 return arm_emit_vector_const (fp
, x
);
27438 /* Output assembly for a shift instruction.
27439 SET_FLAGS determines how the instruction modifies the condition codes.
27440 0 - Do not set condition codes.
27441 1 - Set condition codes.
27442 2 - Use smallest instruction. */
27444 arm_output_shift(rtx
* operands
, int set_flags
)
27447 static const char flag_chars
[3] = {'?', '.', '!'};
27452 c
= flag_chars
[set_flags
];
27453 shift
= shift_op(operands
[3], &val
);
27457 operands
[2] = GEN_INT(val
);
27458 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27461 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27463 output_asm_insn (pattern
, operands
);
27467 /* Output assembly for a WMMX immediate shift instruction. */
27469 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27471 int shift
= INTVAL (operands
[2]);
27473 machine_mode opmode
= GET_MODE (operands
[0]);
27475 gcc_assert (shift
>= 0);
27477 /* If the shift value in the register versions is > 63 (for D qualifier),
27478 31 (for W qualifier) or 15 (for H qualifier). */
27479 if (((opmode
== V4HImode
) && (shift
> 15))
27480 || ((opmode
== V2SImode
) && (shift
> 31))
27481 || ((opmode
== DImode
) && (shift
> 63)))
27485 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27486 output_asm_insn (templ
, operands
);
27487 if (opmode
== DImode
)
27489 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27490 output_asm_insn (templ
, operands
);
27495 /* The destination register will contain all zeros. */
27496 sprintf (templ
, "wzero\t%%0");
27497 output_asm_insn (templ
, operands
);
27502 if ((opmode
== DImode
) && (shift
> 32))
27504 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27505 output_asm_insn (templ
, operands
);
27506 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27507 output_asm_insn (templ
, operands
);
27511 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27512 output_asm_insn (templ
, operands
);
27517 /* Output assembly for a WMMX tinsr instruction. */
27519 arm_output_iwmmxt_tinsr (rtx
*operands
)
27521 int mask
= INTVAL (operands
[3]);
27524 int units
= mode_nunits
[GET_MODE (operands
[0])];
27525 gcc_assert ((mask
& (mask
- 1)) == 0);
27526 for (i
= 0; i
< units
; ++i
)
27528 if ((mask
& 0x01) == 1)
27534 gcc_assert (i
< units
);
27536 switch (GET_MODE (operands
[0]))
27539 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27542 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27545 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27548 gcc_unreachable ();
27551 output_asm_insn (templ
, operands
);
27556 /* Output a Thumb-1 casesi dispatch sequence. */
27558 thumb1_output_casesi (rtx
*operands
)
27560 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27562 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27564 switch (GET_MODE(diff_vec
))
27567 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27568 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27570 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27571 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27573 return "bl\t%___gnu_thumb1_case_si";
27575 gcc_unreachable ();
27579 /* Output a Thumb-2 casesi instruction. */
27581 thumb2_output_casesi (rtx
*operands
)
27583 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27585 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27587 output_asm_insn ("cmp\t%0, %1", operands
);
27588 output_asm_insn ("bhi\t%l3", operands
);
27589 switch (GET_MODE(diff_vec
))
27592 return "tbb\t[%|pc, %0]";
27594 return "tbh\t[%|pc, %0, lsl #1]";
27598 output_asm_insn ("adr\t%4, %l2", operands
);
27599 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27600 output_asm_insn ("add\t%4, %4, %5", operands
);
27605 output_asm_insn ("adr\t%4, %l2", operands
);
27606 return "ldr\t%|pc, [%4, %0, lsl #2]";
27609 gcc_unreachable ();
27613 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27614 per-core tuning structs. */
27616 arm_issue_rate (void)
27618 return current_tune
->issue_rate
;
27621 /* Return how many instructions should scheduler lookahead to choose the
27624 arm_first_cycle_multipass_dfa_lookahead (void)
27626 int issue_rate
= arm_issue_rate ();
27628 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27631 /* Enable modeling of L2 auto-prefetcher. */
27633 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27635 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27639 arm_mangle_type (const_tree type
)
27641 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27642 has to be managled as if it is in the "std" namespace. */
27643 if (TARGET_AAPCS_BASED
27644 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27645 return "St9__va_list";
27647 /* Half-precision float. */
27648 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27651 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27653 if (TYPE_NAME (type
) != NULL
)
27654 return arm_mangle_builtin_type (type
);
27656 /* Use the default mangling. */
27660 /* Order of allocation of core registers for Thumb: this allocation is
27661 written over the corresponding initial entries of the array
27662 initialized with REG_ALLOC_ORDER. We allocate all low registers
27663 first. Saving and restoring a low register is usually cheaper than
27664 using a call-clobbered high register. */
27666 static const int thumb_core_reg_alloc_order
[] =
27668 3, 2, 1, 0, 4, 5, 6, 7,
27669 12, 14, 8, 9, 10, 11
27672 /* Adjust register allocation order when compiling for Thumb. */
27675 arm_order_regs_for_local_alloc (void)
27677 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27678 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27680 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27681 sizeof (thumb_core_reg_alloc_order
));
27684 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27687 arm_frame_pointer_required (void)
27689 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27692 /* If the function receives nonlocal gotos, it needs to save the frame
27693 pointer in the nonlocal_goto_save_area object. */
27694 if (cfun
->has_nonlocal_label
)
27697 /* The frame pointer is required for non-leaf APCS frames. */
27698 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27701 /* If we are probing the stack in the prologue, we will have a faulting
27702 instruction prior to the stack adjustment and this requires a frame
27703 pointer if we want to catch the exception using the EABI unwinder. */
27704 if (!IS_INTERRUPT (arm_current_func_type ())
27705 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27706 && arm_except_unwind_info (&global_options
) == UI_TARGET
27707 && cfun
->can_throw_non_call_exceptions
)
27709 HOST_WIDE_INT size
= get_frame_size ();
27711 /* That's irrelevant if there is no stack adjustment. */
27715 /* That's relevant only if there is a stack probe. */
27716 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27718 /* We don't have the final size of the frame so adjust. */
27719 size
+= 32 * UNITS_PER_WORD
;
27720 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27730 /* Only thumb1 can't support conditional execution, so return true if
27731 the target is not thumb1. */
27733 arm_have_conditional_execution (void)
27735 return !TARGET_THUMB1
;
27738 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27739 static HOST_WIDE_INT
27740 arm_vector_alignment (const_tree type
)
27742 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27744 if (TARGET_AAPCS_BASED
)
27745 align
= MIN (align
, 64);
27750 static unsigned int
27751 arm_autovectorize_vector_sizes (void)
27753 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27757 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27759 /* Vectors which aren't in packed structures will not be less aligned than
27760 the natural alignment of their element type, so this is safe. */
27761 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27764 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27768 arm_builtin_support_vector_misalignment (machine_mode mode
,
27769 const_tree type
, int misalignment
,
27772 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27774 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27779 /* If the misalignment is unknown, we should be able to handle the access
27780 so long as it is not to a member of a packed data structure. */
27781 if (misalignment
== -1)
27784 /* Return true if the misalignment is a multiple of the natural alignment
27785 of the vector's element type. This is probably always going to be
27786 true in practice, since we've already established that this isn't a
27788 return ((misalignment
% align
) == 0);
27791 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27796 arm_conditional_register_usage (void)
27800 if (TARGET_THUMB1
&& optimize_size
)
27802 /* When optimizing for size on Thumb-1, it's better not
27803 to use the HI regs, because of the overhead of
27805 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27806 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27809 /* The link register can be clobbered by any branch insn,
27810 but we have no way to track that at present, so mark
27811 it as unavailable. */
27813 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27815 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
27817 /* VFPv3 registers are disabled when earlier VFP
27818 versions are selected due to the definition of
27819 LAST_VFP_REGNUM. */
27820 for (regno
= FIRST_VFP_REGNUM
;
27821 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27823 fixed_regs
[regno
] = 0;
27824 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27825 || regno
>= FIRST_VFP_REGNUM
+ 32;
27829 if (TARGET_REALLY_IWMMXT
)
27831 regno
= FIRST_IWMMXT_GR_REGNUM
;
27832 /* The 2002/10/09 revision of the XScale ABI has wCG0
27833 and wCG1 as call-preserved registers. The 2002/11/21
27834 revision changed this so that all wCG registers are
27835 scratch registers. */
27836 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27837 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27838 fixed_regs
[regno
] = 0;
27839 /* The XScale ABI has wR0 - wR9 as scratch registers,
27840 the rest as call-preserved registers. */
27841 for (regno
= FIRST_IWMMXT_REGNUM
;
27842 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27844 fixed_regs
[regno
] = 0;
27845 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27849 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27851 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27852 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27854 else if (TARGET_APCS_STACK
)
27856 fixed_regs
[10] = 1;
27857 call_used_regs
[10] = 1;
27859 /* -mcaller-super-interworking reserves r11 for calls to
27860 _interwork_r11_call_via_rN(). Making the register global
27861 is an easy way of ensuring that it remains valid for all
27863 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27864 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27866 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27867 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27868 if (TARGET_CALLER_INTERWORKING
)
27869 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27871 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27875 arm_preferred_rename_class (reg_class_t rclass
)
27877 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27878 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27879 and code size can be reduced. */
27880 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27886 /* Compute the attribute "length" of insn "*push_multi".
27887 So this function MUST be kept in sync with that insn pattern. */
27889 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27891 int i
, regno
, hi_reg
;
27892 int num_saves
= XVECLEN (parallel_op
, 0);
27902 regno
= REGNO (first_op
);
27903 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27904 list is 8-bit. Normally this means all registers in the list must be
27905 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27906 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27907 with 16-bit encoding. */
27908 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27909 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27911 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27912 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27920 /* Compute the attribute "length" of insn. Currently, this function is used
27921 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27922 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27923 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27924 true if OPERANDS contains insn which explicit updates base register. */
27927 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
27936 rtx parallel_op
= operands
[0];
27937 /* Initialize to elements number of PARALLEL. */
27938 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
27939 /* Initialize the value to base register. */
27940 unsigned regno
= REGNO (operands
[1]);
27941 /* Skip return and write back pattern.
27942 We only need register pop pattern for later analysis. */
27943 unsigned first_indx
= 0;
27944 first_indx
+= return_pc
? 1 : 0;
27945 first_indx
+= write_back_p
? 1 : 0;
27947 /* A pop operation can be done through LDM or POP. If the base register is SP
27948 and if it's with write back, then a LDM will be alias of POP. */
27949 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
27950 bool ldm_p
= !pop_p
;
27952 /* Check base register for LDM. */
27953 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
27956 /* Check each register in the list. */
27957 for (; indx
>= first_indx
; indx
--)
27959 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
27960 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27961 comment in arm_attr_length_push_multi. */
27962 if (REGNO_REG_CLASS (regno
) == HI_REGS
27963 && (regno
!= PC_REGNUM
|| ldm_p
))
27970 /* Compute the number of instructions emitted by output_move_double. */
27972 arm_count_output_move_double_insns (rtx
*operands
)
27976 /* output_move_double may modify the operands array, so call it
27977 here on a copy of the array. */
27978 ops
[0] = operands
[0];
27979 ops
[1] = operands
[1];
27980 output_move_double (ops
, false, &count
);
27985 vfp3_const_double_for_fract_bits (rtx operand
)
27987 REAL_VALUE_TYPE r0
;
27989 if (!CONST_DOUBLE_P (operand
))
27992 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
27993 if (exact_real_inverse (DFmode
, &r0
)
27994 && !REAL_VALUE_NEGATIVE (r0
))
27996 if (exact_real_truncate (DFmode
, &r0
))
27998 HOST_WIDE_INT value
= real_to_integer (&r0
);
27999 value
= value
& 0xffffffff;
28000 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28002 int ret
= exact_log2 (value
);
28003 gcc_assert (IN_RANGE (ret
, 0, 31));
28011 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28012 log2 is in [1, 32], return that log2. Otherwise return -1.
28013 This is used in the patterns for vcvt.s32.f32 floating-point to
28014 fixed-point conversions. */
28017 vfp3_const_double_for_bits (rtx x
)
28019 const REAL_VALUE_TYPE
*r
;
28021 if (!CONST_DOUBLE_P (x
))
28024 r
= CONST_DOUBLE_REAL_VALUE (x
);
28026 if (REAL_VALUE_NEGATIVE (*r
)
28027 || REAL_VALUE_ISNAN (*r
)
28028 || REAL_VALUE_ISINF (*r
)
28029 || !real_isinteger (r
, SFmode
))
28032 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28034 /* The exact_log2 above will have returned -1 if this is
28035 not an exact log2. */
28036 if (!IN_RANGE (hwint
, 1, 32))
28043 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28046 arm_pre_atomic_barrier (enum memmodel model
)
28048 if (need_atomic_barrier_p (model
, true))
28049 emit_insn (gen_memory_barrier ());
28053 arm_post_atomic_barrier (enum memmodel model
)
28055 if (need_atomic_barrier_p (model
, false))
28056 emit_insn (gen_memory_barrier ());
28059 /* Emit the load-exclusive and store-exclusive instructions.
28060 Use acquire and release versions if necessary. */
28063 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28065 rtx (*gen
) (rtx
, rtx
);
28071 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28072 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28073 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28074 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28076 gcc_unreachable ();
28083 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28084 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
28085 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
28086 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
28088 gcc_unreachable ();
28092 emit_insn (gen (rval
, mem
));
28096 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28099 rtx (*gen
) (rtx
, rtx
, rtx
);
28105 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28106 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28107 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28108 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28110 gcc_unreachable ();
28117 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28118 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
28119 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
28120 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
28122 gcc_unreachable ();
28126 emit_insn (gen (bval
, rval
, mem
));
28129 /* Mark the previous jump instruction as unlikely. */
28132 emit_unlikely_jump (rtx insn
)
28134 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
28136 rtx_insn
*jump
= emit_jump_insn (insn
);
28137 add_int_reg_note (jump
, REG_BR_PROB
, very_unlikely
);
28140 /* Expand a compare and swap pattern. */
28143 arm_expand_compare_and_swap (rtx operands
[])
28145 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28147 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28149 bval
= operands
[0];
28150 rval
= operands
[1];
28152 oldval
= operands
[3];
28153 newval
= operands
[4];
28154 is_weak
= operands
[5];
28155 mod_s
= operands
[6];
28156 mod_f
= operands
[7];
28157 mode
= GET_MODE (mem
);
28159 /* Normally the succ memory model must be stronger than fail, but in the
28160 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28161 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28163 if (TARGET_HAVE_LDACQ
28164 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28165 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28166 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28172 /* For narrow modes, we're going to perform the comparison in SImode,
28173 so do the zero-extension now. */
28174 rval
= gen_reg_rtx (SImode
);
28175 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28179 /* Force the value into a register if needed. We waited until after
28180 the zero-extension above to do this properly. */
28181 if (!arm_add_operand (oldval
, SImode
))
28182 oldval
= force_reg (SImode
, oldval
);
28186 if (!cmpdi_operand (oldval
, mode
))
28187 oldval
= force_reg (mode
, oldval
);
28191 gcc_unreachable ();
28196 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
28197 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
28198 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
28199 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
28201 gcc_unreachable ();
28204 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CCmode
, CC_REGNUM
);
28205 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28207 if (mode
== QImode
|| mode
== HImode
)
28208 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28210 /* In all cases, we arrange for success to be signaled by Z set.
28211 This arrangement allows for the boolean result to be used directly
28212 in a subsequent branch, post optimization. For Thumb-1 targets, the
28213 boolean negation of the result is also stored in bval because Thumb-1
28214 backend lacks dependency tracking for CC flag due to flag-setting not
28215 being represented at RTL level. */
28217 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28220 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28221 emit_insn (gen_rtx_SET (bval
, x
));
28225 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28226 another memory store between the load-exclusive and store-exclusive can
28227 reset the monitor from Exclusive to Open state. This means we must wait
28228 until after reload to split the pattern, lest we get a register spill in
28229 the middle of the atomic sequence. Success of the compare and swap is
28230 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28231 for Thumb-1 targets (ie. negation of the boolean value returned by
28232 atomic_compare_and_swapmode standard pattern in operand 0). */
28235 arm_split_compare_and_swap (rtx operands
[])
28237 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28239 enum memmodel mod_s
, mod_f
;
28241 rtx_code_label
*label1
, *label2
;
28244 rval
= operands
[1];
28246 oldval
= operands
[3];
28247 newval
= operands
[4];
28248 is_weak
= (operands
[5] != const0_rtx
);
28249 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28250 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28251 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28252 mode
= GET_MODE (mem
);
28254 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28256 bool use_acquire
= TARGET_HAVE_LDACQ
28257 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28258 || is_mm_release (mod_s
));
28260 bool use_release
= TARGET_HAVE_LDACQ
28261 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28262 || is_mm_acquire (mod_s
));
28264 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28265 a full barrier is emitted after the store-release. */
28267 use_acquire
= false;
28269 /* Checks whether a barrier is needed and emits one accordingly. */
28270 if (!(use_acquire
|| use_release
))
28271 arm_pre_atomic_barrier (mod_s
);
28276 label1
= gen_label_rtx ();
28277 emit_label (label1
);
28279 label2
= gen_label_rtx ();
28281 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28283 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28284 as required to communicate with arm_expand_compare_and_swap. */
28287 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28288 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28289 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28290 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28291 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28295 emit_move_insn (neg_bval
, const1_rtx
);
28296 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28297 if (thumb1_cmpneg_operand (oldval
, SImode
))
28298 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28301 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28304 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28306 /* Weak or strong, we want EQ to be true for success, so that we
28307 match the flags that we got from the compare above. */
28310 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28311 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28312 emit_insn (gen_rtx_SET (cond
, x
));
28317 /* Z is set to boolean value of !neg_bval, as required to communicate
28318 with arm_expand_compare_and_swap. */
28319 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28320 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28323 if (!is_mm_relaxed (mod_f
))
28324 emit_label (label2
);
28326 /* Checks whether a barrier is needed and emits one accordingly. */
28328 || !(use_acquire
|| use_release
))
28329 arm_post_atomic_barrier (mod_s
);
28331 if (is_mm_relaxed (mod_f
))
28332 emit_label (label2
);
28335 /* Split an atomic operation pattern. Operation is given by CODE and is one
28336 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28337 operation). Operation is performed on the content at MEM and on VALUE
28338 following the memory model MODEL_RTX. The content at MEM before and after
28339 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28340 success of the operation is returned in COND. Using a scratch register or
28341 an operand register for these determines what result is returned for that
28345 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28346 rtx value
, rtx model_rtx
, rtx cond
)
28348 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28349 machine_mode mode
= GET_MODE (mem
);
28350 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28351 rtx_code_label
*label
;
28352 bool all_low_regs
, bind_old_new
;
28355 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28357 bool use_acquire
= TARGET_HAVE_LDACQ
28358 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28359 || is_mm_release (model
));
28361 bool use_release
= TARGET_HAVE_LDACQ
28362 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28363 || is_mm_acquire (model
));
28365 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28366 a full barrier is emitted after the store-release. */
28368 use_acquire
= false;
28370 /* Checks whether a barrier is needed and emits one accordingly. */
28371 if (!(use_acquire
|| use_release
))
28372 arm_pre_atomic_barrier (model
);
28374 label
= gen_label_rtx ();
28375 emit_label (label
);
28378 new_out
= gen_lowpart (wmode
, new_out
);
28380 old_out
= gen_lowpart (wmode
, old_out
);
28383 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28385 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28387 /* Does the operation require destination and first operand to use the same
28388 register? This is decided by register constraints of relevant insn
28389 patterns in thumb1.md. */
28390 gcc_assert (!new_out
|| REG_P (new_out
));
28391 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28392 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28393 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28398 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28400 /* We want to return the old value while putting the result of the operation
28401 in the same register as the old value so copy the old value over to the
28402 destination register and use that register for the operation. */
28403 if (old_out
&& bind_old_new
)
28405 emit_move_insn (new_out
, old_out
);
28416 x
= gen_rtx_AND (wmode
, old_out
, value
);
28417 emit_insn (gen_rtx_SET (new_out
, x
));
28418 x
= gen_rtx_NOT (wmode
, new_out
);
28419 emit_insn (gen_rtx_SET (new_out
, x
));
28423 if (CONST_INT_P (value
))
28425 value
= GEN_INT (-INTVAL (value
));
28431 if (mode
== DImode
)
28433 /* DImode plus/minus need to clobber flags. */
28434 /* The adddi3 and subdi3 patterns are incorrectly written so that
28435 they require matching operands, even when we could easily support
28436 three operands. Thankfully, this can be fixed up post-splitting,
28437 as the individual add+adc patterns do accept three operands and
28438 post-reload cprop can make these moves go away. */
28439 emit_move_insn (new_out
, old_out
);
28441 x
= gen_adddi3 (new_out
, new_out
, value
);
28443 x
= gen_subdi3 (new_out
, new_out
, value
);
28450 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28451 emit_insn (gen_rtx_SET (new_out
, x
));
28455 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28458 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28459 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28461 /* Checks whether a barrier is needed and emits one accordingly. */
28463 || !(use_acquire
|| use_release
))
28464 arm_post_atomic_barrier (model
);
28467 #define MAX_VECT_LEN 16
28469 struct expand_vec_perm_d
28471 rtx target
, op0
, op1
;
28472 unsigned char perm
[MAX_VECT_LEN
];
28473 machine_mode vmode
;
28474 unsigned char nelt
;
28479 /* Generate a variable permutation. */
28482 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28484 machine_mode vmode
= GET_MODE (target
);
28485 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28487 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28488 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28489 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28490 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28491 gcc_checking_assert (TARGET_NEON
);
28495 if (vmode
== V8QImode
)
28496 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28498 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28504 if (vmode
== V8QImode
)
28506 pair
= gen_reg_rtx (V16QImode
);
28507 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28508 pair
= gen_lowpart (TImode
, pair
);
28509 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28513 pair
= gen_reg_rtx (OImode
);
28514 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28515 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28521 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28523 machine_mode vmode
= GET_MODE (target
);
28524 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28525 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28526 rtx rmask
[MAX_VECT_LEN
], mask
;
28528 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28529 numbering of elements for big-endian, we must reverse the order. */
28530 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28532 /* The VTBL instruction does not use a modulo index, so we must take care
28533 of that ourselves. */
28534 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28535 for (i
= 0; i
< nelt
; ++i
)
28537 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28538 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28540 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28543 /* Map lane ordering between architectural lane order, and GCC lane order,
28544 taking into account ABI. See comment above output_move_neon for details. */
28547 neon_endian_lane_map (machine_mode mode
, int lane
)
28549 if (BYTES_BIG_ENDIAN
)
28551 int nelems
= GET_MODE_NUNITS (mode
);
28552 /* Reverse lane order. */
28553 lane
= (nelems
- 1 - lane
);
28554 /* Reverse D register order, to match ABI. */
28555 if (GET_MODE_SIZE (mode
) == 16)
28556 lane
= lane
^ (nelems
/ 2);
28561 /* Some permutations index into pairs of vectors, this is a helper function
28562 to map indexes into those pairs of vectors. */
28565 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28567 int nelem
= GET_MODE_NUNITS (mode
);
28568 if (BYTES_BIG_ENDIAN
)
28570 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28574 /* Generate or test for an insn that supports a constant permutation. */
28576 /* Recognize patterns for the VUZP insns. */
28579 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28581 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28582 rtx out0
, out1
, in0
, in1
;
28583 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28587 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28590 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28591 big endian pattern on 64 bit vectors, so we correct for that. */
28592 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28593 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28595 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28597 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28599 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28603 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28605 for (i
= 0; i
< nelt
; i
++)
28608 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28609 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28619 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28620 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28621 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28622 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28623 case V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28624 case V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28625 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28626 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28627 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28628 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28630 gcc_unreachable ();
28635 if (swap_nelt
!= 0)
28636 std::swap (in0
, in1
);
28639 out1
= gen_reg_rtx (d
->vmode
);
28641 std::swap (out0
, out1
);
28643 emit_insn (gen (out0
, in0
, in1
, out1
));
28647 /* Recognize patterns for the VZIP insns. */
28650 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28652 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28653 rtx out0
, out1
, in0
, in1
;
28654 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28658 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28661 is_swapped
= BYTES_BIG_ENDIAN
;
28663 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28666 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28668 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28672 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28674 for (i
= 0; i
< nelt
/ 2; i
++)
28677 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28678 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28682 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28683 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28694 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28695 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28696 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28697 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28698 case V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28699 case V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28700 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28701 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28702 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28703 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28705 gcc_unreachable ();
28711 std::swap (in0
, in1
);
28714 out1
= gen_reg_rtx (d
->vmode
);
28716 std::swap (out0
, out1
);
28718 emit_insn (gen (out0
, in0
, in1
, out1
));
28722 /* Recognize patterns for the VREV insns. */
28725 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28727 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28728 rtx (*gen
)(rtx
, rtx
);
28730 if (!d
->one_vector_p
)
28739 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28740 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28748 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28749 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28750 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28751 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28752 case V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28753 case V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28761 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28762 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28763 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28764 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28765 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28766 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28767 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28768 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28777 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28778 for (j
= 0; j
<= diff
; j
+= 1)
28780 /* This is guaranteed to be true as the value of diff
28781 is 7, 3, 1 and we should have enough elements in the
28782 queue to generate this. Getting a vector mask with a
28783 value of diff other than these values implies that
28784 something is wrong by the time we get here. */
28785 gcc_assert (i
+ j
< nelt
);
28786 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28794 emit_insn (gen (d
->target
, d
->op0
));
28798 /* Recognize patterns for the VTRN insns. */
28801 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28803 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28804 rtx out0
, out1
, in0
, in1
;
28805 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28807 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28810 /* Note that these are little-endian tests. Adjust for big-endian later. */
28811 if (d
->perm
[0] == 0)
28813 else if (d
->perm
[0] == 1)
28817 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28819 for (i
= 0; i
< nelt
; i
+= 2)
28821 if (d
->perm
[i
] != i
+ odd
)
28823 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28833 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28834 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28835 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28836 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28837 case V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
28838 case V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
28839 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28840 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28841 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28842 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28844 gcc_unreachable ();
28849 if (BYTES_BIG_ENDIAN
)
28851 std::swap (in0
, in1
);
28856 out1
= gen_reg_rtx (d
->vmode
);
28858 std::swap (out0
, out1
);
28860 emit_insn (gen (out0
, in0
, in1
, out1
));
28864 /* Recognize patterns for the VEXT insns. */
28867 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28869 unsigned int i
, nelt
= d
->nelt
;
28870 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28873 unsigned int location
;
28875 unsigned int next
= d
->perm
[0] + 1;
28877 /* TODO: Handle GCC's numbering of elements for big-endian. */
28878 if (BYTES_BIG_ENDIAN
)
28881 /* Check if the extracted indexes are increasing by one. */
28882 for (i
= 1; i
< nelt
; next
++, i
++)
28884 /* If we hit the most significant element of the 2nd vector in
28885 the previous iteration, no need to test further. */
28886 if (next
== 2 * nelt
)
28889 /* If we are operating on only one vector: it could be a
28890 rotation. If there are only two elements of size < 64, let
28891 arm_evpc_neon_vrev catch it. */
28892 if (d
->one_vector_p
&& (next
== nelt
))
28894 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28900 if (d
->perm
[i
] != next
)
28904 location
= d
->perm
[0];
28908 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28909 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28910 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28911 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28912 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28913 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28914 case V4HFmode
: gen
= gen_neon_vextv4hf
; break;
28915 case V8HFmode
: gen
= gen_neon_vextv8hf
; break;
28916 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28917 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28918 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28927 offset
= GEN_INT (location
);
28928 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28932 /* The NEON VTBL instruction is a fully variable permuation that's even
28933 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28934 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28935 can do slightly better by expanding this as a constant where we don't
28936 have to apply a mask. */
28939 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28941 rtx rperm
[MAX_VECT_LEN
], sel
;
28942 machine_mode vmode
= d
->vmode
;
28943 unsigned int i
, nelt
= d
->nelt
;
28945 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28946 numbering of elements for big-endian, we must reverse the order. */
28947 if (BYTES_BIG_ENDIAN
)
28953 /* Generic code will try constant permutation twice. Once with the
28954 original mode and again with the elements lowered to QImode.
28955 So wait and don't do the selector expansion ourselves. */
28956 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28959 for (i
= 0; i
< nelt
; ++i
)
28960 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28961 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28962 sel
= force_reg (vmode
, sel
);
28964 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28969 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28971 /* Check if the input mask matches vext before reordering the
28974 if (arm_evpc_neon_vext (d
))
28977 /* The pattern matching functions above are written to look for a small
28978 number to begin the sequence (0, 1, N/2). If we begin with an index
28979 from the second operand, we can swap the operands. */
28980 if (d
->perm
[0] >= d
->nelt
)
28982 unsigned i
, nelt
= d
->nelt
;
28984 for (i
= 0; i
< nelt
; ++i
)
28985 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28987 std::swap (d
->op0
, d
->op1
);
28992 if (arm_evpc_neon_vuzp (d
))
28994 if (arm_evpc_neon_vzip (d
))
28996 if (arm_evpc_neon_vrev (d
))
28998 if (arm_evpc_neon_vtrn (d
))
29000 return arm_evpc_neon_vtbl (d
);
29005 /* Expand a vec_perm_const pattern. */
29008 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29010 struct expand_vec_perm_d d
;
29011 int i
, nelt
, which
;
29017 d
.vmode
= GET_MODE (target
);
29018 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29019 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29020 d
.testing_p
= false;
29022 for (i
= which
= 0; i
< nelt
; ++i
)
29024 rtx e
= XVECEXP (sel
, 0, i
);
29025 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29026 which
|= (ei
< nelt
? 1 : 2);
29036 d
.one_vector_p
= false;
29037 if (!rtx_equal_p (op0
, op1
))
29040 /* The elements of PERM do not suggest that only the first operand
29041 is used, but both operands are identical. Allow easier matching
29042 of the permutation by folding the permutation into the single
29046 for (i
= 0; i
< nelt
; ++i
)
29047 d
.perm
[i
] &= nelt
- 1;
29049 d
.one_vector_p
= true;
29054 d
.one_vector_p
= true;
29058 return arm_expand_vec_perm_const_1 (&d
);
29061 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29064 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
29065 const unsigned char *sel
)
29067 struct expand_vec_perm_d d
;
29068 unsigned int i
, nelt
, which
;
29072 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29073 d
.testing_p
= true;
29074 memcpy (d
.perm
, sel
, nelt
);
29076 /* Categorize the set of elements in the selector. */
29077 for (i
= which
= 0; i
< nelt
; ++i
)
29079 unsigned char e
= d
.perm
[i
];
29080 gcc_assert (e
< 2 * nelt
);
29081 which
|= (e
< nelt
? 1 : 2);
29084 /* For all elements from second vector, fold the elements to first. */
29086 for (i
= 0; i
< nelt
; ++i
)
29089 /* Check whether the mask can be applied to the vector type. */
29090 d
.one_vector_p
= (which
!= 3);
29092 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29093 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29094 if (!d
.one_vector_p
)
29095 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29098 ret
= arm_expand_vec_perm_const_1 (&d
);
29105 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29107 /* If we are soft float and we do not have ldrd
29108 then all auto increment forms are ok. */
29109 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29114 /* Post increment and Pre Decrement are supported for all
29115 instruction forms except for vector forms. */
29118 if (VECTOR_MODE_P (mode
))
29120 if (code
!= ARM_PRE_DEC
)
29130 /* Without LDRD and mode size greater than
29131 word size, there is no point in auto-incrementing
29132 because ldm and stm will not have these forms. */
29133 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29136 /* Vector and floating point modes do not support
29137 these auto increment forms. */
29138 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29151 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29152 on ARM, since we know that shifts by negative amounts are no-ops.
29153 Additionally, the default expansion code is not available or suitable
29154 for post-reload insn splits (this can occur when the register allocator
29155 chooses not to do a shift in NEON).
29157 This function is used in both initial expand and post-reload splits, and
29158 handles all kinds of 64-bit shifts.
29160 Input requirements:
29161 - It is safe for the input and output to be the same register, but
29162 early-clobber rules apply for the shift amount and scratch registers.
29163 - Shift by register requires both scratch registers. In all other cases
29164 the scratch registers may be NULL.
29165 - Ashiftrt by a register also clobbers the CC register. */
29167 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29168 rtx amount
, rtx scratch1
, rtx scratch2
)
29170 rtx out_high
= gen_highpart (SImode
, out
);
29171 rtx out_low
= gen_lowpart (SImode
, out
);
29172 rtx in_high
= gen_highpart (SImode
, in
);
29173 rtx in_low
= gen_lowpart (SImode
, in
);
29176 in = the register pair containing the input value.
29177 out = the destination register pair.
29178 up = the high- or low-part of each pair.
29179 down = the opposite part to "up".
29180 In a shift, we can consider bits to shift from "up"-stream to
29181 "down"-stream, so in a left-shift "up" is the low-part and "down"
29182 is the high-part of each register pair. */
29184 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29185 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29186 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29187 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29189 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29191 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29192 && GET_MODE (out
) == DImode
);
29194 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29195 && GET_MODE (in
) == DImode
);
29197 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29198 && GET_MODE (amount
) == SImode
)
29199 || CONST_INT_P (amount
)));
29200 gcc_assert (scratch1
== NULL
29201 || (GET_CODE (scratch1
) == SCRATCH
)
29202 || (GET_MODE (scratch1
) == SImode
29203 && REG_P (scratch1
)));
29204 gcc_assert (scratch2
== NULL
29205 || (GET_CODE (scratch2
) == SCRATCH
)
29206 || (GET_MODE (scratch2
) == SImode
29207 && REG_P (scratch2
)));
29208 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29209 || !HARD_REGISTER_P (out
)
29210 || (REGNO (out
) != REGNO (amount
)
29211 && REGNO (out
) + 1 != REGNO (amount
)));
29213 /* Macros to make following code more readable. */
29214 #define SUB_32(DEST,SRC) \
29215 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29216 #define RSB_32(DEST,SRC) \
29217 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29218 #define SUB_S_32(DEST,SRC) \
29219 gen_addsi3_compare0 ((DEST), (SRC), \
29221 #define SET(DEST,SRC) \
29222 gen_rtx_SET ((DEST), (SRC))
29223 #define SHIFT(CODE,SRC,AMOUNT) \
29224 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29225 #define LSHIFT(CODE,SRC,AMOUNT) \
29226 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29227 SImode, (SRC), (AMOUNT))
29228 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29229 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29230 SImode, (SRC), (AMOUNT))
29232 gen_rtx_IOR (SImode, (A), (B))
29233 #define BRANCH(COND,LABEL) \
29234 gen_arm_cond_branch ((LABEL), \
29235 gen_rtx_ ## COND (CCmode, cc_reg, \
29239 /* Shifts by register and shifts by constant are handled separately. */
29240 if (CONST_INT_P (amount
))
29242 /* We have a shift-by-constant. */
29244 /* First, handle out-of-range shift amounts.
29245 In both cases we try to match the result an ARM instruction in a
29246 shift-by-register would give. This helps reduce execution
29247 differences between optimization levels, but it won't stop other
29248 parts of the compiler doing different things. This is "undefined
29249 behavior, in any case. */
29250 if (INTVAL (amount
) <= 0)
29251 emit_insn (gen_movdi (out
, in
));
29252 else if (INTVAL (amount
) >= 64)
29254 if (code
== ASHIFTRT
)
29256 rtx const31_rtx
= GEN_INT (31);
29257 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29258 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29261 emit_insn (gen_movdi (out
, const0_rtx
));
29264 /* Now handle valid shifts. */
29265 else if (INTVAL (amount
) < 32)
29267 /* Shifts by a constant less than 32. */
29268 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29270 /* Clearing the out register in DImode first avoids lots
29271 of spilling and results in less stack usage.
29272 Later this redundant insn is completely removed.
29273 Do that only if "in" and "out" are different registers. */
29274 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29275 emit_insn (SET (out
, const0_rtx
));
29276 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29277 emit_insn (SET (out_down
,
29278 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29280 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29284 /* Shifts by a constant greater than 31. */
29285 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29287 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29288 emit_insn (SET (out
, const0_rtx
));
29289 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29290 if (code
== ASHIFTRT
)
29291 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29294 emit_insn (SET (out_up
, const0_rtx
));
29299 /* We have a shift-by-register. */
29300 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29302 /* This alternative requires the scratch registers. */
29303 gcc_assert (scratch1
&& REG_P (scratch1
));
29304 gcc_assert (scratch2
&& REG_P (scratch2
));
29306 /* We will need the values "amount-32" and "32-amount" later.
29307 Swapping them around now allows the later code to be more general. */
29311 emit_insn (SUB_32 (scratch1
, amount
));
29312 emit_insn (RSB_32 (scratch2
, amount
));
29315 emit_insn (RSB_32 (scratch1
, amount
));
29316 /* Also set CC = amount > 32. */
29317 emit_insn (SUB_S_32 (scratch2
, amount
));
29320 emit_insn (RSB_32 (scratch1
, amount
));
29321 emit_insn (SUB_32 (scratch2
, amount
));
29324 gcc_unreachable ();
29327 /* Emit code like this:
29330 out_down = in_down << amount;
29331 out_down = (in_up << (amount - 32)) | out_down;
29332 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29333 out_up = in_up << amount;
29336 out_down = in_down >> amount;
29337 out_down = (in_up << (32 - amount)) | out_down;
29339 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29340 out_up = in_up << amount;
29343 out_down = in_down >> amount;
29344 out_down = (in_up << (32 - amount)) | out_down;
29346 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29347 out_up = in_up << amount;
29349 The ARM and Thumb2 variants are the same but implemented slightly
29350 differently. If this were only called during expand we could just
29351 use the Thumb2 case and let combine do the right thing, but this
29352 can also be called from post-reload splitters. */
29354 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29356 if (!TARGET_THUMB2
)
29358 /* Emit code for ARM mode. */
29359 emit_insn (SET (out_down
,
29360 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29361 if (code
== ASHIFTRT
)
29363 rtx_code_label
*done_label
= gen_label_rtx ();
29364 emit_jump_insn (BRANCH (LT
, done_label
));
29365 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29367 emit_label (done_label
);
29370 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29375 /* Emit code for Thumb2 mode.
29376 Thumb2 can't do shift and or in one insn. */
29377 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29378 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29380 if (code
== ASHIFTRT
)
29382 rtx_code_label
*done_label
= gen_label_rtx ();
29383 emit_jump_insn (BRANCH (LT
, done_label
));
29384 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29385 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29386 emit_label (done_label
);
29390 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29391 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29395 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29409 /* Returns true if the pattern is a valid symbolic address, which is either a
29410 symbol_ref or (symbol_ref + addend).
29412 According to the ARM ELF ABI, the initial addend of REL-type relocations
29413 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29414 literal field of the instruction as a 16-bit signed value in the range
29415 -32768 <= A < 32768. */
29418 arm_valid_symbolic_address_p (rtx addr
)
29420 rtx xop0
, xop1
= NULL_RTX
;
29423 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29426 /* (const (plus: symbol_ref const_int)) */
29427 if (GET_CODE (addr
) == CONST
)
29428 tmp
= XEXP (addr
, 0);
29430 if (GET_CODE (tmp
) == PLUS
)
29432 xop0
= XEXP (tmp
, 0);
29433 xop1
= XEXP (tmp
, 1);
29435 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29436 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29442 /* Returns true if a valid comparison operation and makes
29443 the operands in a form that is valid. */
29445 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29447 enum rtx_code code
= GET_CODE (*comparison
);
29449 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29450 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29452 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29454 if (code
== UNEQ
|| code
== LTGT
)
29457 code_int
= (int)code
;
29458 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29459 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29464 if (!arm_add_operand (*op1
, mode
))
29465 *op1
= force_reg (mode
, *op1
);
29466 if (!arm_add_operand (*op2
, mode
))
29467 *op2
= force_reg (mode
, *op2
);
29471 if (!cmpdi_operand (*op1
, mode
))
29472 *op1
= force_reg (mode
, *op1
);
29473 if (!cmpdi_operand (*op2
, mode
))
29474 *op2
= force_reg (mode
, *op2
);
29478 if (!TARGET_VFP_FP16INST
)
29480 /* FP16 comparisons are done in SF mode. */
29482 *op1
= convert_to_mode (mode
, *op1
, 1);
29483 *op2
= convert_to_mode (mode
, *op2
, 1);
29484 /* Fall through. */
29487 if (!vfp_compare_operand (*op1
, mode
))
29488 *op1
= force_reg (mode
, *op1
);
29489 if (!vfp_compare_operand (*op2
, mode
))
29490 *op2
= force_reg (mode
, *op2
);
29500 /* Maximum number of instructions to set block of memory. */
29502 arm_block_set_max_insns (void)
29504 if (optimize_function_for_size_p (cfun
))
29507 return current_tune
->max_insns_inline_memset
;
29510 /* Return TRUE if it's profitable to set block of memory for
29511 non-vectorized case. VAL is the value to set the memory
29512 with. LENGTH is the number of bytes to set. ALIGN is the
29513 alignment of the destination memory in bytes. UNALIGNED_P
29514 is TRUE if we can only set the memory with instructions
29515 meeting alignment requirements. USE_STRD_P is TRUE if we
29516 can use strd to set the memory. */
29518 arm_block_set_non_vect_profit_p (rtx val
,
29519 unsigned HOST_WIDE_INT length
,
29520 unsigned HOST_WIDE_INT align
,
29521 bool unaligned_p
, bool use_strd_p
)
29524 /* For leftovers in bytes of 0-7, we can set the memory block using
29525 strb/strh/str with minimum instruction number. */
29526 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29530 num
= arm_const_inline_cost (SET
, val
);
29531 num
+= length
/ align
+ length
% align
;
29533 else if (use_strd_p
)
29535 num
= arm_const_double_inline_cost (val
);
29536 num
+= (length
>> 3) + leftover
[length
& 7];
29540 num
= arm_const_inline_cost (SET
, val
);
29541 num
+= (length
>> 2) + leftover
[length
& 3];
29544 /* We may be able to combine last pair STRH/STRB into a single STR
29545 by shifting one byte back. */
29546 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29549 return (num
<= arm_block_set_max_insns ());
29552 /* Return TRUE if it's profitable to set block of memory for
29553 vectorized case. LENGTH is the number of bytes to set.
29554 ALIGN is the alignment of destination memory in bytes.
29555 MODE is the vector mode used to set the memory. */
29557 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29558 unsigned HOST_WIDE_INT align
,
29562 bool unaligned_p
= ((align
& 3) != 0);
29563 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29565 /* Instruction loading constant value. */
29567 /* Instructions storing the memory. */
29568 num
+= (length
+ nelt
- 1) / nelt
;
29569 /* Instructions adjusting the address expression. Only need to
29570 adjust address expression if it's 4 bytes aligned and bytes
29571 leftover can only be stored by mis-aligned store instruction. */
29572 if (!unaligned_p
&& (length
& 3) != 0)
29575 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29576 if (!unaligned_p
&& mode
== V16QImode
)
29579 return (num
<= arm_block_set_max_insns ());
29582 /* Set a block of memory using vectorization instructions for the
29583 unaligned case. We fill the first LENGTH bytes of the memory
29584 area starting from DSTBASE with byte constant VALUE. ALIGN is
29585 the alignment requirement of memory. Return TRUE if succeeded. */
29587 arm_block_set_unaligned_vect (rtx dstbase
,
29588 unsigned HOST_WIDE_INT length
,
29589 unsigned HOST_WIDE_INT value
,
29590 unsigned HOST_WIDE_INT align
)
29592 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29594 rtx val_elt
, val_vec
, reg
;
29595 rtx rval
[MAX_VECT_LEN
];
29596 rtx (*gen_func
) (rtx
, rtx
);
29598 unsigned HOST_WIDE_INT v
= value
;
29599 unsigned int offset
= 0;
29600 gcc_assert ((align
& 0x3) != 0);
29601 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29602 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29603 if (length
>= nelt_v16
)
29606 gen_func
= gen_movmisalignv16qi
;
29611 gen_func
= gen_movmisalignv8qi
;
29613 nelt_mode
= GET_MODE_NUNITS (mode
);
29614 gcc_assert (length
>= nelt_mode
);
29615 /* Skip if it isn't profitable. */
29616 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29619 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29620 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29622 v
= sext_hwi (v
, BITS_PER_WORD
);
29623 val_elt
= GEN_INT (v
);
29624 for (j
= 0; j
< nelt_mode
; j
++)
29627 reg
= gen_reg_rtx (mode
);
29628 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29629 /* Emit instruction loading the constant value. */
29630 emit_move_insn (reg
, val_vec
);
29632 /* Handle nelt_mode bytes in a vector. */
29633 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29635 emit_insn ((*gen_func
) (mem
, reg
));
29636 if (i
+ 2 * nelt_mode
<= length
)
29638 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29639 offset
+= nelt_mode
;
29640 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29644 /* If there are not less than nelt_v8 bytes leftover, we must be in
29646 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29648 /* Handle (8, 16) bytes leftover. */
29649 if (i
+ nelt_v8
< length
)
29651 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29652 offset
+= length
- i
;
29653 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29655 /* We are shifting bytes back, set the alignment accordingly. */
29656 if ((length
& 1) != 0 && align
>= 2)
29657 set_mem_align (mem
, BITS_PER_UNIT
);
29659 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29661 /* Handle (0, 8] bytes leftover. */
29662 else if (i
< length
&& i
+ nelt_v8
>= length
)
29664 if (mode
== V16QImode
)
29665 reg
= gen_lowpart (V8QImode
, reg
);
29667 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29668 + (nelt_mode
- nelt_v8
))));
29669 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29670 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29672 /* We are shifting bytes back, set the alignment accordingly. */
29673 if ((length
& 1) != 0 && align
>= 2)
29674 set_mem_align (mem
, BITS_PER_UNIT
);
29676 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29682 /* Set a block of memory using vectorization instructions for the
29683 aligned case. We fill the first LENGTH bytes of the memory area
29684 starting from DSTBASE with byte constant VALUE. ALIGN is the
29685 alignment requirement of memory. Return TRUE if succeeded. */
29687 arm_block_set_aligned_vect (rtx dstbase
,
29688 unsigned HOST_WIDE_INT length
,
29689 unsigned HOST_WIDE_INT value
,
29690 unsigned HOST_WIDE_INT align
)
29692 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29693 rtx dst
, addr
, mem
;
29694 rtx val_elt
, val_vec
, reg
;
29695 rtx rval
[MAX_VECT_LEN
];
29697 unsigned HOST_WIDE_INT v
= value
;
29698 unsigned int offset
= 0;
29700 gcc_assert ((align
& 0x3) == 0);
29701 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29702 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29703 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29708 nelt_mode
= GET_MODE_NUNITS (mode
);
29709 gcc_assert (length
>= nelt_mode
);
29710 /* Skip if it isn't profitable. */
29711 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29714 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29716 v
= sext_hwi (v
, BITS_PER_WORD
);
29717 val_elt
= GEN_INT (v
);
29718 for (j
= 0; j
< nelt_mode
; j
++)
29721 reg
= gen_reg_rtx (mode
);
29722 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29723 /* Emit instruction loading the constant value. */
29724 emit_move_insn (reg
, val_vec
);
29727 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29728 if (mode
== V16QImode
)
29730 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29731 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29733 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29734 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29736 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29737 offset
+= length
- nelt_mode
;
29738 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29739 /* We are shifting bytes back, set the alignment accordingly. */
29740 if ((length
& 0x3) == 0)
29741 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29742 else if ((length
& 0x1) == 0)
29743 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29745 set_mem_align (mem
, BITS_PER_UNIT
);
29747 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29750 /* Fall through for bytes leftover. */
29752 nelt_mode
= GET_MODE_NUNITS (mode
);
29753 reg
= gen_lowpart (V8QImode
, reg
);
29756 /* Handle 8 bytes in a vector. */
29757 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29759 addr
= plus_constant (Pmode
, dst
, i
);
29760 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29761 emit_move_insn (mem
, reg
);
29764 /* Handle single word leftover by shifting 4 bytes back. We can
29765 use aligned access for this case. */
29766 if (i
+ UNITS_PER_WORD
== length
)
29768 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29769 offset
+= i
- UNITS_PER_WORD
;
29770 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29771 /* We are shifting 4 bytes back, set the alignment accordingly. */
29772 if (align
> UNITS_PER_WORD
)
29773 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29775 emit_move_insn (mem
, reg
);
29777 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29778 We have to use unaligned access for this case. */
29779 else if (i
< length
)
29781 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29782 offset
+= length
- nelt_mode
;
29783 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29784 /* We are shifting bytes back, set the alignment accordingly. */
29785 if ((length
& 1) == 0)
29786 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29788 set_mem_align (mem
, BITS_PER_UNIT
);
29790 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29796 /* Set a block of memory using plain strh/strb instructions, only
29797 using instructions allowed by ALIGN on processor. We fill the
29798 first LENGTH bytes of the memory area starting from DSTBASE
29799 with byte constant VALUE. ALIGN is the alignment requirement
29802 arm_block_set_unaligned_non_vect (rtx dstbase
,
29803 unsigned HOST_WIDE_INT length
,
29804 unsigned HOST_WIDE_INT value
,
29805 unsigned HOST_WIDE_INT align
)
29808 rtx dst
, addr
, mem
;
29809 rtx val_exp
, val_reg
, reg
;
29811 HOST_WIDE_INT v
= value
;
29813 gcc_assert (align
== 1 || align
== 2);
29816 v
|= (value
<< BITS_PER_UNIT
);
29818 v
= sext_hwi (v
, BITS_PER_WORD
);
29819 val_exp
= GEN_INT (v
);
29820 /* Skip if it isn't profitable. */
29821 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29822 align
, true, false))
29825 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29826 mode
= (align
== 2 ? HImode
: QImode
);
29827 val_reg
= force_reg (SImode
, val_exp
);
29828 reg
= gen_lowpart (mode
, val_reg
);
29830 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29832 addr
= plus_constant (Pmode
, dst
, i
);
29833 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29834 emit_move_insn (mem
, reg
);
29837 /* Handle single byte leftover. */
29838 if (i
+ 1 == length
)
29840 reg
= gen_lowpart (QImode
, val_reg
);
29841 addr
= plus_constant (Pmode
, dst
, i
);
29842 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29843 emit_move_insn (mem
, reg
);
29847 gcc_assert (i
== length
);
29851 /* Set a block of memory using plain strd/str/strh/strb instructions,
29852 to permit unaligned copies on processors which support unaligned
29853 semantics for those instructions. We fill the first LENGTH bytes
29854 of the memory area starting from DSTBASE with byte constant VALUE.
29855 ALIGN is the alignment requirement of memory. */
29857 arm_block_set_aligned_non_vect (rtx dstbase
,
29858 unsigned HOST_WIDE_INT length
,
29859 unsigned HOST_WIDE_INT value
,
29860 unsigned HOST_WIDE_INT align
)
29863 rtx dst
, addr
, mem
;
29864 rtx val_exp
, val_reg
, reg
;
29865 unsigned HOST_WIDE_INT v
;
29868 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29869 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29871 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29872 if (length
< UNITS_PER_WORD
)
29873 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29876 v
|= (v
<< BITS_PER_WORD
);
29878 v
= sext_hwi (v
, BITS_PER_WORD
);
29880 val_exp
= GEN_INT (v
);
29881 /* Skip if it isn't profitable. */
29882 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29883 align
, false, use_strd_p
))
29888 /* Try without strd. */
29889 v
= (v
>> BITS_PER_WORD
);
29890 v
= sext_hwi (v
, BITS_PER_WORD
);
29891 val_exp
= GEN_INT (v
);
29892 use_strd_p
= false;
29893 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29894 align
, false, use_strd_p
))
29899 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29900 /* Handle double words using strd if possible. */
29903 val_reg
= force_reg (DImode
, val_exp
);
29905 for (; (i
+ 8 <= length
); i
+= 8)
29907 addr
= plus_constant (Pmode
, dst
, i
);
29908 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29909 emit_move_insn (mem
, reg
);
29913 val_reg
= force_reg (SImode
, val_exp
);
29915 /* Handle words. */
29916 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29917 for (; (i
+ 4 <= length
); i
+= 4)
29919 addr
= plus_constant (Pmode
, dst
, i
);
29920 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29921 if ((align
& 3) == 0)
29922 emit_move_insn (mem
, reg
);
29924 emit_insn (gen_unaligned_storesi (mem
, reg
));
29927 /* Merge last pair of STRH and STRB into a STR if possible. */
29928 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29930 addr
= plus_constant (Pmode
, dst
, i
- 1);
29931 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29932 /* We are shifting one byte back, set the alignment accordingly. */
29933 if ((align
& 1) == 0)
29934 set_mem_align (mem
, BITS_PER_UNIT
);
29936 /* Most likely this is an unaligned access, and we can't tell at
29937 compilation time. */
29938 emit_insn (gen_unaligned_storesi (mem
, reg
));
29942 /* Handle half word leftover. */
29943 if (i
+ 2 <= length
)
29945 reg
= gen_lowpart (HImode
, val_reg
);
29946 addr
= plus_constant (Pmode
, dst
, i
);
29947 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29948 if ((align
& 1) == 0)
29949 emit_move_insn (mem
, reg
);
29951 emit_insn (gen_unaligned_storehi (mem
, reg
));
29956 /* Handle single byte leftover. */
29957 if (i
+ 1 == length
)
29959 reg
= gen_lowpart (QImode
, val_reg
);
29960 addr
= plus_constant (Pmode
, dst
, i
);
29961 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29962 emit_move_insn (mem
, reg
);
29968 /* Set a block of memory using vectorization instructions for both
29969 aligned and unaligned cases. We fill the first LENGTH bytes of
29970 the memory area starting from DSTBASE with byte constant VALUE.
29971 ALIGN is the alignment requirement of memory. */
29973 arm_block_set_vect (rtx dstbase
,
29974 unsigned HOST_WIDE_INT length
,
29975 unsigned HOST_WIDE_INT value
,
29976 unsigned HOST_WIDE_INT align
)
29978 /* Check whether we need to use unaligned store instruction. */
29979 if (((align
& 3) != 0 || (length
& 3) != 0)
29980 /* Check whether unaligned store instruction is available. */
29981 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29984 if ((align
& 3) == 0)
29985 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29987 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29990 /* Expand string store operation. Firstly we try to do that by using
29991 vectorization instructions, then try with ARM unaligned access and
29992 double-word store if profitable. OPERANDS[0] is the destination,
29993 OPERANDS[1] is the number of bytes, operands[2] is the value to
29994 initialize the memory, OPERANDS[3] is the known alignment of the
29997 arm_gen_setmem (rtx
*operands
)
29999 rtx dstbase
= operands
[0];
30000 unsigned HOST_WIDE_INT length
;
30001 unsigned HOST_WIDE_INT value
;
30002 unsigned HOST_WIDE_INT align
;
30004 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30007 length
= UINTVAL (operands
[1]);
30011 value
= (UINTVAL (operands
[2]) & 0xFF);
30012 align
= UINTVAL (operands
[3]);
30013 if (TARGET_NEON
&& length
>= 8
30014 && current_tune
->string_ops_prefer_neon
30015 && arm_block_set_vect (dstbase
, length
, value
, align
))
30018 if (!unaligned_access
&& (align
& 3) != 0)
30019 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30021 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30026 arm_macro_fusion_p (void)
30028 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30031 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30032 for MOVW / MOVT macro fusion. */
30035 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30037 /* We are trying to fuse
30038 movw imm / movt imm
30039 instructions as a group that gets scheduled together. */
30041 rtx set_dest
= SET_DEST (curr_set
);
30043 if (GET_MODE (set_dest
) != SImode
)
30046 /* We are trying to match:
30047 prev (movw) == (set (reg r0) (const_int imm16))
30048 curr (movt) == (set (zero_extract (reg r0)
30051 (const_int imm16_1))
30053 prev (movw) == (set (reg r1)
30054 (high (symbol_ref ("SYM"))))
30055 curr (movt) == (set (reg r0)
30057 (symbol_ref ("SYM")))) */
30059 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30061 if (CONST_INT_P (SET_SRC (curr_set
))
30062 && CONST_INT_P (SET_SRC (prev_set
))
30063 && REG_P (XEXP (set_dest
, 0))
30064 && REG_P (SET_DEST (prev_set
))
30065 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30069 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30070 && REG_P (SET_DEST (curr_set
))
30071 && REG_P (SET_DEST (prev_set
))
30072 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30073 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30080 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30082 rtx prev_set
= single_set (prev
);
30083 rtx curr_set
= single_set (curr
);
30089 if (any_condjump_p (curr
))
30092 if (!arm_macro_fusion_p ())
30095 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30096 && aarch_crypto_can_dual_issue (prev
, curr
))
30099 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30100 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30106 /* Return true iff the instruction fusion described by OP is enabled. */
30108 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30110 return current_tune
->fusible_ops
& op
;
30113 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30114 scheduled for speculative execution. Reject the long-running division
30115 and square-root instructions. */
30118 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30120 switch (get_attr_type (insn
))
30128 case TYPE_NEON_FP_SQRT_S
:
30129 case TYPE_NEON_FP_SQRT_D
:
30130 case TYPE_NEON_FP_SQRT_S_Q
:
30131 case TYPE_NEON_FP_SQRT_D_Q
:
30132 case TYPE_NEON_FP_DIV_S
:
30133 case TYPE_NEON_FP_DIV_D
:
30134 case TYPE_NEON_FP_DIV_S_Q
:
30135 case TYPE_NEON_FP_DIV_D_Q
:
30142 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30144 static unsigned HOST_WIDE_INT
30145 arm_asan_shadow_offset (void)
30147 return HOST_WIDE_INT_1U
<< 29;
30151 /* This is a temporary fix for PR60655. Ideally we need
30152 to handle most of these cases in the generic part but
30153 currently we reject minus (..) (sym_ref). We try to
30154 ameliorate the case with minus (sym_ref1) (sym_ref2)
30155 where they are in the same section. */
30158 arm_const_not_ok_for_debug_p (rtx p
)
30160 tree decl_op0
= NULL
;
30161 tree decl_op1
= NULL
;
30163 if (GET_CODE (p
) == MINUS
)
30165 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30167 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30169 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30170 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30172 if ((VAR_P (decl_op1
)
30173 || TREE_CODE (decl_op1
) == CONST_DECL
)
30174 && (VAR_P (decl_op0
)
30175 || TREE_CODE (decl_op0
) == CONST_DECL
))
30176 return (get_variable_section (decl_op1
, false)
30177 != get_variable_section (decl_op0
, false));
30179 if (TREE_CODE (decl_op1
) == LABEL_DECL
30180 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30181 return (DECL_CONTEXT (decl_op1
)
30182 != DECL_CONTEXT (decl_op0
));
30192 /* return TRUE if x is a reference to a value in a constant pool */
30194 arm_is_constant_pool_ref (rtx x
)
30197 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30198 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30201 /* Remember the last target of arm_set_current_function. */
30202 static GTY(()) tree arm_previous_fndecl
;
30204 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30207 save_restore_target_globals (tree new_tree
)
30209 /* If we have a previous state, use it. */
30210 if (TREE_TARGET_GLOBALS (new_tree
))
30211 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30212 else if (new_tree
== target_option_default_node
)
30213 restore_target_globals (&default_target_globals
);
30216 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30217 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30220 arm_option_params_internal ();
30223 /* Invalidate arm_previous_fndecl. */
30226 arm_reset_previous_fndecl (void)
30228 arm_previous_fndecl
= NULL_TREE
;
30231 /* Establish appropriate back-end context for processing the function
30232 FNDECL. The argument might be NULL to indicate processing at top
30233 level, outside of any function scope. */
30236 arm_set_current_function (tree fndecl
)
30238 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30241 tree old_tree
= (arm_previous_fndecl
30242 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30245 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30247 /* If current function has no attributes but previous one did,
30248 use the default node. */
30249 if (! new_tree
&& old_tree
)
30250 new_tree
= target_option_default_node
;
30252 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30253 the default have been handled by save_restore_target_globals from
30254 arm_pragma_target_parse. */
30255 if (old_tree
== new_tree
)
30258 arm_previous_fndecl
= fndecl
;
30260 /* First set the target options. */
30261 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30263 save_restore_target_globals (new_tree
);
30266 /* Implement TARGET_OPTION_PRINT. */
30269 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30271 int flags
= ptr
->x_target_flags
;
30272 const char *fpu_name
;
30274 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30275 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30277 fprintf (file
, "%*sselected arch %s\n", indent
, "",
30278 TARGET_THUMB2_P (flags
) ? "thumb2" :
30279 TARGET_THUMB_P (flags
) ? "thumb1" :
30282 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30285 /* Hook to determine if one function can safely inline another. */
30288 arm_can_inline_p (tree caller
, tree callee
)
30290 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30291 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30292 bool can_inline
= true;
30294 struct cl_target_option
*caller_opts
30295 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30296 : target_option_default_node
);
30298 struct cl_target_option
*callee_opts
30299 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30300 : target_option_default_node
);
30302 if (callee_opts
== caller_opts
)
30305 /* Callee's ISA features should be a subset of the caller's. */
30306 struct arm_build_target caller_target
;
30307 struct arm_build_target callee_target
;
30308 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30309 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30311 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30313 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30315 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30316 can_inline
= false;
30318 sbitmap_free (caller_target
.isa
);
30319 sbitmap_free (callee_target
.isa
);
30321 /* OK to inline between different modes.
30322 Function with mode specific instructions, e.g using asm,
30323 must be explicitly protected with noinline. */
30327 /* Hook to fix function's alignment affected by target attribute. */
30330 arm_relayout_function (tree fndecl
)
30332 if (DECL_USER_ALIGN (fndecl
))
30335 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30338 callee_tree
= target_option_default_node
;
30340 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30343 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30346 /* Inner function to process the attribute((target(...))), take an argument and
30347 set the current options from the argument. If we have a list, recursively
30348 go over the list. */
30351 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30353 if (TREE_CODE (args
) == TREE_LIST
)
30357 for (; args
; args
= TREE_CHAIN (args
))
30358 if (TREE_VALUE (args
)
30359 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30364 else if (TREE_CODE (args
) != STRING_CST
)
30366 error ("attribute %<target%> argument not a string");
30370 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30373 while ((q
= strtok (argstr
, ",")) != NULL
)
30375 while (ISSPACE (*q
)) ++q
;
30378 if (!strncmp (q
, "thumb", 5))
30379 opts
->x_target_flags
|= MASK_THUMB
;
30381 else if (!strncmp (q
, "arm", 3))
30382 opts
->x_target_flags
&= ~MASK_THUMB
;
30384 else if (!strncmp (q
, "fpu=", 4))
30387 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30388 &fpu_index
, CL_TARGET
))
30390 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30393 if (fpu_index
== TARGET_FPU_auto
)
30395 /* This doesn't really make sense until we support
30396 general dynamic selection of the architecture and all
30398 sorry ("auto fpu selection not currently permitted here");
30401 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30405 error ("attribute(target(\"%s\")) is unknown", q
);
30413 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30416 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30417 struct gcc_options
*opts_set
)
30419 struct cl_target_option cl_opts
;
30421 if (!arm_valid_target_attribute_rec (args
, opts
))
30424 cl_target_option_save (&cl_opts
, opts
);
30425 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30426 arm_option_check_internal (opts
);
30427 /* Do any overrides, such as global options arch=xxx. */
30428 arm_option_override_internal (opts
, opts_set
);
30430 return build_target_option_node (opts
);
30434 add_attribute (const char * mode
, tree
*attributes
)
30436 size_t len
= strlen (mode
);
30437 tree value
= build_string (len
, mode
);
30439 TREE_TYPE (value
) = build_array_type (char_type_node
,
30440 build_index_type (size_int (len
)));
30442 *attributes
= tree_cons (get_identifier ("target"),
30443 build_tree_list (NULL_TREE
, value
),
30447 /* For testing. Insert thumb or arm modes alternatively on functions. */
30450 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30454 if (! TARGET_FLIP_THUMB
)
30457 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30458 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30461 /* Nested definitions must inherit mode. */
30462 if (current_function_decl
)
30464 mode
= TARGET_THUMB
? "thumb" : "arm";
30465 add_attribute (mode
, attributes
);
30469 /* If there is already a setting don't change it. */
30470 if (lookup_attribute ("target", *attributes
) != NULL
)
30473 mode
= thumb_flipper
? "thumb" : "arm";
30474 add_attribute (mode
, attributes
);
30476 thumb_flipper
= !thumb_flipper
;
30479 /* Hook to validate attribute((target("string"))). */
30482 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30483 tree args
, int ARG_UNUSED (flags
))
30486 struct gcc_options func_options
;
30487 tree cur_tree
, new_optimize
;
30488 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30490 /* Get the optimization options of the current function. */
30491 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30493 /* If the function changed the optimization levels as well as setting target
30494 options, start with the optimizations specified. */
30495 if (!func_optimize
)
30496 func_optimize
= optimization_default_node
;
30498 /* Init func_options. */
30499 memset (&func_options
, 0, sizeof (func_options
));
30500 init_options_struct (&func_options
, NULL
);
30501 lang_hooks
.init_options_struct (&func_options
);
30503 /* Initialize func_options to the defaults. */
30504 cl_optimization_restore (&func_options
,
30505 TREE_OPTIMIZATION (func_optimize
));
30507 cl_target_option_restore (&func_options
,
30508 TREE_TARGET_OPTION (target_option_default_node
));
30510 /* Set func_options flags with new target mode. */
30511 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30512 &global_options_set
);
30514 if (cur_tree
== NULL_TREE
)
30517 new_optimize
= build_optimization_node (&func_options
);
30519 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30521 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30523 finalize_options_struct (&func_options
);
30528 /* Match an ISA feature bitmap to a named FPU. We always use the
30529 first entry that exactly matches the feature set, so that we
30530 effectively canonicalize the FPU name for the assembler. */
30532 arm_identify_fpu_from_isa (sbitmap isa
)
30534 auto_sbitmap
fpubits (isa_num_bits
);
30535 auto_sbitmap
cand_fpubits (isa_num_bits
);
30537 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30539 /* If there are no ISA feature bits relating to the FPU, we must be
30540 doing soft-float. */
30541 if (bitmap_empty_p (fpubits
))
30544 for (unsigned int i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
30546 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30547 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30548 return all_fpus
[i
].name
;
30550 /* We must find an entry, or things have gone wrong. */
30551 gcc_unreachable ();
30555 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30558 fprintf (stream
, "\t.syntax unified\n");
30562 if (is_called_in_ARM_mode (decl
)
30563 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30564 && cfun
->is_thunk
))
30565 fprintf (stream
, "\t.code 32\n");
30566 else if (TARGET_THUMB1
)
30567 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30569 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30572 fprintf (stream
, "\t.arm\n");
30574 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30577 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30579 if (TARGET_POKE_FUNCTION_NAME
)
30580 arm_poke_function_name (stream
, (const char *) name
);
30583 /* If MEM is in the form of [base+offset], extract the two parts
30584 of address and set to BASE and OFFSET, otherwise return false
30585 after clearing BASE and OFFSET. */
30588 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30592 gcc_assert (MEM_P (mem
));
30594 addr
= XEXP (mem
, 0);
30596 /* Strip off const from addresses like (const (addr)). */
30597 if (GET_CODE (addr
) == CONST
)
30598 addr
= XEXP (addr
, 0);
30600 if (GET_CODE (addr
) == REG
)
30603 *offset
= const0_rtx
;
30607 if (GET_CODE (addr
) == PLUS
30608 && GET_CODE (XEXP (addr
, 0)) == REG
30609 && CONST_INT_P (XEXP (addr
, 1)))
30611 *base
= XEXP (addr
, 0);
30612 *offset
= XEXP (addr
, 1);
30617 *offset
= NULL_RTX
;
30622 /* If INSN is a load or store of address in the form of [base+offset],
30623 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30624 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30625 otherwise return FALSE. */
30628 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30632 gcc_assert (INSN_P (insn
));
30633 x
= PATTERN (insn
);
30634 if (GET_CODE (x
) != SET
)
30638 dest
= SET_DEST (x
);
30639 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30642 extract_base_offset_in_addr (dest
, base
, offset
);
30644 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30647 extract_base_offset_in_addr (src
, base
, offset
);
30652 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30655 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30657 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30658 and PRI are only calculated for these instructions. For other instruction,
30659 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30660 instruction fusion can be supported by returning different priorities.
30662 It's important that irrelevant instructions get the largest FUSION_PRI. */
30665 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30666 int *fusion_pri
, int *pri
)
30672 gcc_assert (INSN_P (insn
));
30675 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30682 /* Load goes first. */
30684 *fusion_pri
= tmp
- 1;
30686 *fusion_pri
= tmp
- 2;
30690 /* INSN with smaller base register goes first. */
30691 tmp
-= ((REGNO (base
) & 0xff) << 20);
30693 /* INSN with smaller offset goes first. */
30694 off_val
= (int)(INTVAL (offset
));
30696 tmp
-= (off_val
& 0xfffff);
30698 tmp
+= ((- off_val
) & 0xfffff);
30705 /* Construct and return a PARALLEL RTX vector with elements numbering the
30706 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30707 the vector - from the perspective of the architecture. This does not
30708 line up with GCC's perspective on lane numbers, so we end up with
30709 different masks depending on our target endian-ness. The diagram
30710 below may help. We must draw the distinction when building masks
30711 which select one half of the vector. An instruction selecting
30712 architectural low-lanes for a big-endian target, must be described using
30713 a mask selecting GCC high-lanes.
30715 Big-Endian Little-Endian
30717 GCC 0 1 2 3 3 2 1 0
30718 | x | x | x | x | | x | x | x | x |
30719 Architecture 3 2 1 0 3 2 1 0
30721 Low Mask: { 2, 3 } { 0, 1 }
30722 High Mask: { 0, 1 } { 2, 3 }
30726 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30728 int nunits
= GET_MODE_NUNITS (mode
);
30729 rtvec v
= rtvec_alloc (nunits
/ 2);
30730 int high_base
= nunits
/ 2;
30736 if (BYTES_BIG_ENDIAN
)
30737 base
= high
? low_base
: high_base
;
30739 base
= high
? high_base
: low_base
;
30741 for (i
= 0; i
< nunits
/ 2; i
++)
30742 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30744 t1
= gen_rtx_PARALLEL (mode
, v
);
30748 /* Check OP for validity as a PARALLEL RTX vector with elements
30749 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30750 from the perspective of the architecture. See the diagram above
30751 arm_simd_vect_par_cnst_half_p for more details. */
30754 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30757 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30758 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30759 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30762 if (!VECTOR_MODE_P (mode
))
30765 if (count_op
!= count_ideal
)
30768 for (i
= 0; i
< count_ideal
; i
++)
30770 rtx elt_op
= XVECEXP (op
, 0, i
);
30771 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30773 if (!CONST_INT_P (elt_op
)
30774 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30780 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30783 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30786 /* For now, we punt and not handle this for TARGET_THUMB1. */
30787 if (vcall_offset
&& TARGET_THUMB1
)
30790 /* Otherwise ok. */
30794 /* Generate RTL for a conditional branch with rtx comparison CODE in
30795 mode CC_MODE. The destination of the unlikely conditional branch
30799 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
30803 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
30804 gen_rtx_REG (cc_mode
, CC_REGNUM
),
30807 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30808 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
30810 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30813 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30815 For pure-code sections there is no letter code for this attribute, so
30816 output all the section flags numerically when this is needed. */
30819 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
30822 if (flags
& SECTION_ARM_PURECODE
)
30826 if (!(flags
& SECTION_DEBUG
))
30828 if (flags
& SECTION_EXCLUDE
)
30829 *num
|= 0x80000000;
30830 if (flags
& SECTION_WRITE
)
30832 if (flags
& SECTION_CODE
)
30834 if (flags
& SECTION_MERGE
)
30836 if (flags
& SECTION_STRINGS
)
30838 if (flags
& SECTION_TLS
)
30840 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
30849 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30851 If pure-code is passed as an option, make sure all functions are in
30852 sections that have the SHF_ARM_PURECODE attribute. */
30855 arm_function_section (tree decl
, enum node_frequency freq
,
30856 bool startup
, bool exit
)
30858 const char * section_name
;
30861 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
30862 return default_function_section (decl
, freq
, startup
, exit
);
30864 if (!target_pure_code
)
30865 return default_function_section (decl
, freq
, startup
, exit
);
30868 section_name
= DECL_SECTION_NAME (decl
);
30870 /* If a function is not in a named section then it falls under the 'default'
30871 text section, also known as '.text'. We can preserve previous behavior as
30872 the default text section already has the SHF_ARM_PURECODE section
30876 section
*default_sec
= default_function_section (decl
, freq
, startup
,
30879 /* If default_sec is not null, then it must be a special section like for
30880 example .text.startup. We set the pure-code attribute and return the
30881 same section to preserve existing behavior. */
30883 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30884 return default_sec
;
30887 /* Otherwise look whether a section has already been created with
30889 sec
= get_named_section (decl
, section_name
, 0);
30891 /* If that is not the case passing NULL as the section's name to
30892 'get_named_section' will create a section with the declaration's
30894 sec
= get_named_section (decl
, NULL
, 0);
30896 /* Set the SHF_ARM_PURECODE attribute. */
30897 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30902 /* Implements the TARGET_SECTION_FLAGS hook.
30904 If DECL is a function declaration and pure-code is passed as an option
30905 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30906 section's name and RELOC indicates whether the declarations initializer may
30907 contain runtime relocations. */
30909 static unsigned int
30910 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
30912 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
30914 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
30915 flags
|= SECTION_ARM_PURECODE
;
30920 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
30923 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
30925 rtx
*quot_p
, rtx
*rem_p
)
30927 if (mode
== SImode
)
30928 gcc_assert (!TARGET_IDIV
);
30930 machine_mode libval_mode
= smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode
),
30933 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
30935 op0
, GET_MODE (op0
),
30936 op1
, GET_MODE (op1
));
30938 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
30939 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
30940 GET_MODE_SIZE (mode
));
30942 gcc_assert (quotient
);
30943 gcc_assert (remainder
);
30945 *quot_p
= quotient
;
30946 *rem_p
= remainder
;
30949 /* This function checks for the availability of the coprocessor builtin passed
30950 in BUILTIN for the current target. Returns true if it is available and
30951 false otherwise. If a BUILTIN is passed for which this function has not
30952 been implemented it will cause an exception. */
30955 arm_coproc_builtin_available (enum unspecv builtin
)
30957 /* None of these builtins are available in Thumb mode if the target only
30958 supports Thumb-1. */
30976 case VUNSPEC_LDC2L
:
30978 case VUNSPEC_STC2L
:
30981 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
30988 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
30990 if (arm_arch6
|| arm_arch5te
)
30993 case VUNSPEC_MCRR2
:
30994 case VUNSPEC_MRRC2
:
30999 gcc_unreachable ();
31004 /* This function returns true if OP is a valid memory operand for the ldc and
31005 stc coprocessor instructions and false otherwise. */
31008 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31010 HOST_WIDE_INT range
;
31011 /* Has to be a memory operand. */
31017 /* We accept registers. */
31021 switch GET_CODE (op
)
31025 /* Or registers with an offset. */
31026 if (!REG_P (XEXP (op
, 0)))
31031 /* The offset must be an immediate though. */
31032 if (!CONST_INT_P (op
))
31035 range
= INTVAL (op
);
31037 /* Within the range of [-1020,1020]. */
31038 if (!IN_RANGE (range
, -1020, 1020))
31041 /* And a multiple of 4. */
31042 return (range
% 4) == 0;
31048 return REG_P (XEXP (op
, 0));
31050 gcc_unreachable ();
31054 #include "gt-arm.h"