1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
40 #include "diagnostic-core.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
61 #include "target-globals.h"
63 #include "tm-constrs.h"
65 #include "optabs-libfuncs.h"
67 /* This file should be included last. */
68 #include "target-def.h"
70 /* Forward definitions of types. */
71 typedef struct minipool_node Mnode
;
72 typedef struct minipool_fixup Mfix
;
74 void (*arm_lang_output_object_attributes_hook
)(void);
81 /* Forward function declarations. */
82 static bool arm_const_not_ok_for_debug_p (rtx
);
83 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
84 static int arm_compute_static_chain_stack_bytes (void);
85 static arm_stack_offsets
*arm_get_frame_offsets (void);
86 static void arm_add_gc_roots (void);
87 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
88 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
89 static unsigned bit_count (unsigned long);
90 static unsigned feature_count (const arm_feature_set
*);
91 static int arm_address_register_rtx_p (rtx
, int);
92 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
93 static bool is_called_in_ARM_mode (tree
);
94 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
95 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
96 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
97 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
98 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
99 inline static int thumb1_index_register_rtx_p (rtx
, int);
100 static int thumb_far_jump_used_p (void);
101 static bool thumb_force_lr_save (void);
102 static unsigned arm_size_return_regs (void);
103 static bool arm_assemble_integer (rtx
, unsigned int, int);
104 static void arm_print_operand (FILE *, rtx
, int);
105 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
106 static bool arm_print_operand_punct_valid_p (unsigned char code
);
107 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
108 static arm_cc
get_arm_condition_code (rtx
);
109 static const char *output_multi_immediate (rtx
*, const char *, const char *,
111 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
112 static struct machine_function
*arm_init_machine_status (void);
113 static void thumb_exit (FILE *, int);
114 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
115 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
116 static Mnode
*add_minipool_forward_ref (Mfix
*);
117 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
118 static Mnode
*add_minipool_backward_ref (Mfix
*);
119 static void assign_minipool_offsets (Mfix
*);
120 static void arm_print_value (FILE *, rtx
);
121 static void dump_minipool (rtx_insn
*);
122 static int arm_barrier_cost (rtx_insn
*);
123 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
124 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
125 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
127 static void arm_reorg (void);
128 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
129 static unsigned long arm_compute_save_reg0_reg12_mask (void);
130 static unsigned long arm_compute_save_reg_mask (void);
131 static unsigned long arm_isr_value (tree
);
132 static unsigned long arm_compute_func_type (void);
133 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
134 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
135 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
136 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
137 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
139 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
140 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
141 static int arm_comp_type_attributes (const_tree
, const_tree
);
142 static void arm_set_default_type_attributes (tree
);
143 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
144 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
145 static int optimal_immediate_sequence (enum rtx_code code
,
146 unsigned HOST_WIDE_INT val
,
147 struct four_ints
*return_sequence
);
148 static int optimal_immediate_sequence_1 (enum rtx_code code
,
149 unsigned HOST_WIDE_INT val
,
150 struct four_ints
*return_sequence
,
152 static int arm_get_strip_length (int);
153 static bool arm_function_ok_for_sibcall (tree
, tree
);
154 static machine_mode
arm_promote_function_mode (const_tree
,
157 static bool arm_return_in_memory (const_tree
, const_tree
);
158 static rtx
arm_function_value (const_tree
, const_tree
, bool);
159 static rtx
arm_libcall_value_1 (machine_mode
);
160 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
161 static bool arm_function_value_regno_p (const unsigned int);
162 static void arm_internal_label (FILE *, const char *, unsigned long);
163 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
165 static bool arm_have_conditional_execution (void);
166 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
167 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
168 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
169 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
170 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
171 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
172 static void emit_constant_insn (rtx cond
, rtx pattern
);
173 static rtx_insn
*emit_set_insn (rtx
, rtx
);
174 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
175 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
177 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
179 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
181 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
182 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
184 static rtx
aapcs_libcall_value (machine_mode
);
185 static int aapcs_select_return_coproc (const_tree
, const_tree
);
187 #ifdef OBJECT_FORMAT_ELF
188 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
189 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
192 static void arm_encode_section_info (tree
, rtx
, int);
195 static void arm_file_end (void);
196 static void arm_file_start (void);
197 static void arm_insert_attributes (tree
, tree
*);
199 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
201 static bool arm_pass_by_reference (cumulative_args_t
,
202 machine_mode
, const_tree
, bool);
203 static bool arm_promote_prototypes (const_tree
);
204 static bool arm_default_short_enums (void);
205 static bool arm_align_anon_bitfield (void);
206 static bool arm_return_in_msb (const_tree
);
207 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
208 static bool arm_return_in_memory (const_tree
, const_tree
);
210 static void arm_unwind_emit (FILE *, rtx_insn
*);
211 static bool arm_output_ttype (rtx
);
212 static void arm_asm_emit_except_personality (rtx
);
214 static void arm_asm_init_sections (void);
215 static rtx
arm_dwarf_register_span (rtx
);
217 static tree
arm_cxx_guard_type (void);
218 static bool arm_cxx_guard_mask_bit (void);
219 static tree
arm_get_cookie_size (tree
);
220 static bool arm_cookie_has_size (void);
221 static bool arm_cxx_cdtor_returns_this (void);
222 static bool arm_cxx_key_method_may_be_inline (void);
223 static void arm_cxx_determine_class_data_visibility (tree
);
224 static bool arm_cxx_class_data_always_comdat (void);
225 static bool arm_cxx_use_aeabi_atexit (void);
226 static void arm_init_libfuncs (void);
227 static tree
arm_build_builtin_va_list (void);
228 static void arm_expand_builtin_va_start (tree
, rtx
);
229 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
230 static void arm_option_override (void);
231 static void arm_override_options_after_change (void);
232 static void arm_option_print (FILE *, int, struct cl_target_option
*);
233 static void arm_set_current_function (tree
);
234 static bool arm_can_inline_p (tree
, tree
);
235 static void arm_relayout_function (tree
);
236 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
237 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
238 static bool arm_macro_fusion_p (void);
239 static bool arm_cannot_copy_insn_p (rtx_insn
*);
240 static int arm_issue_rate (void);
241 static int arm_first_cycle_multipass_dfa_lookahead (void);
242 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
243 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
244 static bool arm_output_addr_const_extra (FILE *, rtx
);
245 static bool arm_allocate_stack_slots_for_args (void);
246 static bool arm_warn_func_return (tree
);
247 static tree
arm_promoted_type (const_tree t
);
248 static tree
arm_convert_to_type (tree type
, tree expr
);
249 static bool arm_scalar_mode_supported_p (machine_mode
);
250 static bool arm_frame_pointer_required (void);
251 static bool arm_can_eliminate (const int, const int);
252 static void arm_asm_trampoline_template (FILE *);
253 static void arm_trampoline_init (rtx
, tree
, rtx
);
254 static rtx
arm_trampoline_adjust_address (rtx
);
255 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
256 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
257 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
258 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
259 static bool arm_array_mode_supported_p (machine_mode
,
260 unsigned HOST_WIDE_INT
);
261 static machine_mode
arm_preferred_simd_mode (machine_mode
);
262 static bool arm_class_likely_spilled_p (reg_class_t
);
263 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
264 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
265 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
269 static void arm_conditional_register_usage (void);
270 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
271 static unsigned int arm_autovectorize_vector_sizes (void);
272 static int arm_default_branch_cost (bool, bool);
273 static int arm_cortex_a5_branch_cost (bool, bool);
274 static int arm_cortex_m_branch_cost (bool, bool);
275 static int arm_cortex_m7_branch_cost (bool, bool);
277 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
278 const unsigned char *sel
);
280 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
282 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
284 int misalign ATTRIBUTE_UNUSED
);
285 static unsigned arm_add_stmt_cost (void *data
, int count
,
286 enum vect_cost_for_stmt kind
,
287 struct _stmt_vec_info
*stmt_info
,
289 enum vect_cost_model_location where
);
291 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
292 bool op0_preserve_value
);
293 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
295 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
296 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
298 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
299 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
300 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
302 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
304 /* Table of machine attributes. */
305 static const struct attribute_spec arm_attribute_table
[] =
307 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
308 affects_type_identity } */
309 /* Function calls made to this symbol must be done indirectly, because
310 it may lie outside of the 26 bit addressing range of a normal function
312 { "long_call", 0, 0, false, true, true, NULL
, false },
313 /* Whereas these functions are always known to reside within the 26 bit
315 { "short_call", 0, 0, false, true, true, NULL
, false },
316 /* Specify the procedure call conventions for a function. */
317 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
319 /* Interrupt Service Routines have special prologue and epilogue requirements. */
320 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
322 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
324 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
327 /* ARM/PE has three new attributes:
329 dllexport - for exporting a function/variable that will live in a dll
330 dllimport - for importing a function/variable from a dll
332 Microsoft allows multiple declspecs in one __declspec, separating
333 them with spaces. We do NOT support this. Instead, use __declspec
336 { "dllimport", 0, 0, true, false, false, NULL
, false },
337 { "dllexport", 0, 0, true, false, false, NULL
, false },
338 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
340 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
343 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
346 { NULL
, 0, 0, false, false, false, NULL
, false }
349 /* Initialize the GCC target structure. */
350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
351 #undef TARGET_MERGE_DECL_ATTRIBUTES
352 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
355 #undef TARGET_LEGITIMIZE_ADDRESS
356 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
358 #undef TARGET_ATTRIBUTE_TABLE
359 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
361 #undef TARGET_INSERT_ATTRIBUTES
362 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
364 #undef TARGET_ASM_FILE_START
365 #define TARGET_ASM_FILE_START arm_file_start
366 #undef TARGET_ASM_FILE_END
367 #define TARGET_ASM_FILE_END arm_file_end
369 #undef TARGET_ASM_ALIGNED_SI_OP
370 #define TARGET_ASM_ALIGNED_SI_OP NULL
371 #undef TARGET_ASM_INTEGER
372 #define TARGET_ASM_INTEGER arm_assemble_integer
374 #undef TARGET_PRINT_OPERAND
375 #define TARGET_PRINT_OPERAND arm_print_operand
376 #undef TARGET_PRINT_OPERAND_ADDRESS
377 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
378 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
379 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
381 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
382 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
384 #undef TARGET_ASM_FUNCTION_PROLOGUE
385 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
387 #undef TARGET_ASM_FUNCTION_EPILOGUE
388 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
390 #undef TARGET_CAN_INLINE_P
391 #define TARGET_CAN_INLINE_P arm_can_inline_p
393 #undef TARGET_RELAYOUT_FUNCTION
394 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
396 #undef TARGET_OPTION_OVERRIDE
397 #define TARGET_OPTION_OVERRIDE arm_option_override
399 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
400 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
402 #undef TARGET_OPTION_PRINT
403 #define TARGET_OPTION_PRINT arm_option_print
405 #undef TARGET_COMP_TYPE_ATTRIBUTES
406 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
408 #undef TARGET_SCHED_MACRO_FUSION_P
409 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
411 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
412 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
414 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
415 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
417 #undef TARGET_SCHED_ADJUST_COST
418 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
420 #undef TARGET_SET_CURRENT_FUNCTION
421 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
423 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
424 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
426 #undef TARGET_SCHED_REORDER
427 #define TARGET_SCHED_REORDER arm_sched_reorder
429 #undef TARGET_REGISTER_MOVE_COST
430 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
432 #undef TARGET_MEMORY_MOVE_COST
433 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
435 #undef TARGET_ENCODE_SECTION_INFO
437 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
439 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
442 #undef TARGET_STRIP_NAME_ENCODING
443 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
445 #undef TARGET_ASM_INTERNAL_LABEL
446 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
448 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
449 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
451 #undef TARGET_FUNCTION_VALUE
452 #define TARGET_FUNCTION_VALUE arm_function_value
454 #undef TARGET_LIBCALL_VALUE
455 #define TARGET_LIBCALL_VALUE arm_libcall_value
457 #undef TARGET_FUNCTION_VALUE_REGNO_P
458 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
460 #undef TARGET_ASM_OUTPUT_MI_THUNK
461 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
462 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
463 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
465 #undef TARGET_RTX_COSTS
466 #define TARGET_RTX_COSTS arm_rtx_costs
467 #undef TARGET_ADDRESS_COST
468 #define TARGET_ADDRESS_COST arm_address_cost
470 #undef TARGET_SHIFT_TRUNCATION_MASK
471 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
474 #undef TARGET_ARRAY_MODE_SUPPORTED_P
475 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
476 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
477 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
478 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
479 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
480 arm_autovectorize_vector_sizes
482 #undef TARGET_MACHINE_DEPENDENT_REORG
483 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
485 #undef TARGET_INIT_BUILTINS
486 #define TARGET_INIT_BUILTINS arm_init_builtins
487 #undef TARGET_EXPAND_BUILTIN
488 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
489 #undef TARGET_BUILTIN_DECL
490 #define TARGET_BUILTIN_DECL arm_builtin_decl
492 #undef TARGET_INIT_LIBFUNCS
493 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
495 #undef TARGET_PROMOTE_FUNCTION_MODE
496 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
497 #undef TARGET_PROMOTE_PROTOTYPES
498 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
499 #undef TARGET_PASS_BY_REFERENCE
500 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
501 #undef TARGET_ARG_PARTIAL_BYTES
502 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
503 #undef TARGET_FUNCTION_ARG
504 #define TARGET_FUNCTION_ARG arm_function_arg
505 #undef TARGET_FUNCTION_ARG_ADVANCE
506 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
507 #undef TARGET_FUNCTION_ARG_BOUNDARY
508 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
510 #undef TARGET_SETUP_INCOMING_VARARGS
511 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
513 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
514 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
516 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
517 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
518 #undef TARGET_TRAMPOLINE_INIT
519 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
520 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
521 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
523 #undef TARGET_WARN_FUNC_RETURN
524 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
526 #undef TARGET_DEFAULT_SHORT_ENUMS
527 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
529 #undef TARGET_ALIGN_ANON_BITFIELD
530 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
532 #undef TARGET_NARROW_VOLATILE_BITFIELD
533 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
535 #undef TARGET_CXX_GUARD_TYPE
536 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
538 #undef TARGET_CXX_GUARD_MASK_BIT
539 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
541 #undef TARGET_CXX_GET_COOKIE_SIZE
542 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
544 #undef TARGET_CXX_COOKIE_HAS_SIZE
545 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
547 #undef TARGET_CXX_CDTOR_RETURNS_THIS
548 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
550 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
551 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
553 #undef TARGET_CXX_USE_AEABI_ATEXIT
554 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
556 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
557 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
558 arm_cxx_determine_class_data_visibility
560 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
561 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
563 #undef TARGET_RETURN_IN_MSB
564 #define TARGET_RETURN_IN_MSB arm_return_in_msb
566 #undef TARGET_RETURN_IN_MEMORY
567 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
569 #undef TARGET_MUST_PASS_IN_STACK
570 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
573 #undef TARGET_ASM_UNWIND_EMIT
574 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
576 /* EABI unwinding tables use a different format for the typeinfo tables. */
577 #undef TARGET_ASM_TTYPE
578 #define TARGET_ASM_TTYPE arm_output_ttype
580 #undef TARGET_ARM_EABI_UNWINDER
581 #define TARGET_ARM_EABI_UNWINDER true
583 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
584 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
586 #undef TARGET_ASM_INIT_SECTIONS
587 #endif /* ARM_UNWIND_INFO */
588 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
590 #undef TARGET_DWARF_REGISTER_SPAN
591 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
593 #undef TARGET_CANNOT_COPY_INSN_P
594 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
597 #undef TARGET_HAVE_TLS
598 #define TARGET_HAVE_TLS true
601 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
602 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
604 #undef TARGET_LEGITIMATE_CONSTANT_P
605 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
607 #undef TARGET_CANNOT_FORCE_CONST_MEM
608 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
610 #undef TARGET_MAX_ANCHOR_OFFSET
611 #define TARGET_MAX_ANCHOR_OFFSET 4095
613 /* The minimum is set such that the total size of the block
614 for a particular anchor is -4088 + 1 + 4095 bytes, which is
615 divisible by eight, ensuring natural spacing of anchors. */
616 #undef TARGET_MIN_ANCHOR_OFFSET
617 #define TARGET_MIN_ANCHOR_OFFSET -4088
619 #undef TARGET_SCHED_ISSUE_RATE
620 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
622 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
623 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
624 arm_first_cycle_multipass_dfa_lookahead
626 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
627 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
628 arm_first_cycle_multipass_dfa_lookahead_guard
630 #undef TARGET_MANGLE_TYPE
631 #define TARGET_MANGLE_TYPE arm_mangle_type
633 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
634 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
636 #undef TARGET_BUILD_BUILTIN_VA_LIST
637 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
638 #undef TARGET_EXPAND_BUILTIN_VA_START
639 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
640 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
641 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
644 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
645 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
648 #undef TARGET_LEGITIMATE_ADDRESS_P
649 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
651 #undef TARGET_PREFERRED_RELOAD_CLASS
652 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
654 #undef TARGET_PROMOTED_TYPE
655 #define TARGET_PROMOTED_TYPE arm_promoted_type
657 #undef TARGET_CONVERT_TO_TYPE
658 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
660 #undef TARGET_SCALAR_MODE_SUPPORTED_P
661 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
663 #undef TARGET_FRAME_POINTER_REQUIRED
664 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
666 #undef TARGET_CAN_ELIMINATE
667 #define TARGET_CAN_ELIMINATE arm_can_eliminate
669 #undef TARGET_CONDITIONAL_REGISTER_USAGE
670 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
672 #undef TARGET_CLASS_LIKELY_SPILLED_P
673 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
675 #undef TARGET_VECTORIZE_BUILTINS
676 #define TARGET_VECTORIZE_BUILTINS
678 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
679 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
680 arm_builtin_vectorized_function
682 #undef TARGET_VECTOR_ALIGNMENT
683 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
685 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
686 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
687 arm_vector_alignment_reachable
689 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
690 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
691 arm_builtin_support_vector_misalignment
693 #undef TARGET_PREFERRED_RENAME_CLASS
694 #define TARGET_PREFERRED_RENAME_CLASS \
695 arm_preferred_rename_class
697 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
698 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
699 arm_vectorize_vec_perm_const_ok
701 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
702 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
703 arm_builtin_vectorization_cost
704 #undef TARGET_VECTORIZE_ADD_STMT_COST
705 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
707 #undef TARGET_CANONICALIZE_COMPARISON
708 #define TARGET_CANONICALIZE_COMPARISON \
709 arm_canonicalize_comparison
711 #undef TARGET_ASAN_SHADOW_OFFSET
712 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
714 #undef MAX_INSN_PER_IT_BLOCK
715 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
717 #undef TARGET_CAN_USE_DOLOOP_P
718 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
720 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
721 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
723 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
724 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
726 #undef TARGET_SCHED_FUSION_PRIORITY
727 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
729 #undef TARGET_ASM_FUNCTION_SECTION
730 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
732 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
733 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
735 #undef TARGET_SECTION_TYPE_FLAGS
736 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
738 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
739 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
741 struct gcc_target targetm
= TARGET_INITIALIZER
;
743 /* Obstack for minipool constant handling. */
744 static struct obstack minipool_obstack
;
745 static char * minipool_startobj
;
747 /* The maximum number of insns skipped which
748 will be conditionalised if possible. */
749 static int max_insns_skipped
= 5;
751 extern FILE * asm_out_file
;
753 /* True if we are currently building a constant table. */
754 int making_const_table
;
756 /* The processor for which instructions should be scheduled. */
757 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
759 /* The current tuning set. */
760 const struct tune_params
*current_tune
;
762 /* Which floating point hardware to schedule for. */
765 /* Used for Thumb call_via trampolines. */
766 rtx thumb_call_via_label
[14];
767 static int thumb_call_reg_needed
;
769 /* The bits in this mask specify which
770 instructions we are allowed to generate. */
771 arm_feature_set insn_flags
= ARM_FSET_EMPTY
;
773 /* The bits in this mask specify which instruction scheduling options should
775 arm_feature_set tune_flags
= ARM_FSET_EMPTY
;
777 /* The highest ARM architecture version supported by the
779 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
781 /* The following are used in the arm.md file as equivalents to bits
782 in the above two flag variables. */
784 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
787 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
790 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
793 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
796 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
799 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
802 /* Nonzero if this chip supports the ARM 6K extensions. */
805 /* Nonzero if this chip supports the ARM 6KZ extensions. */
808 /* Nonzero if instructions present in ARMv6-M can be used. */
811 /* Nonzero if this chip supports the ARM 7 extensions. */
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm
= 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
820 /* Nonzero if instructions present in ARMv8 can be used. */
823 /* Nonzero if this chip supports the ARMv8.1 extensions. */
826 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
829 /* Nonzero if this chip supports the FP16 instructions extension of ARM
831 int arm_fp16_inst
= 0;
833 /* Nonzero if this chip can benefit from load scheduling. */
834 int arm_ld_sched
= 0;
836 /* Nonzero if this chip is a StrongARM. */
837 int arm_tune_strongarm
= 0;
839 /* Nonzero if this chip supports Intel Wireless MMX technology. */
840 int arm_arch_iwmmxt
= 0;
842 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
843 int arm_arch_iwmmxt2
= 0;
845 /* Nonzero if this chip is an XScale. */
846 int arm_arch_xscale
= 0;
848 /* Nonzero if tuning for XScale */
849 int arm_tune_xscale
= 0;
851 /* Nonzero if we want to tune for stores that access the write-buffer.
852 This typically means an ARM6 or ARM7 with MMU or MPU. */
853 int arm_tune_wbuf
= 0;
855 /* Nonzero if tuning for Cortex-A9. */
856 int arm_tune_cortex_a9
= 0;
858 /* Nonzero if we should define __THUMB_INTERWORK__ in the
860 XXX This is a bit of a hack, it's intended to help work around
861 problems in GLD which doesn't understand that armv5t code is
862 interworking clean. */
863 int arm_cpp_interwork
= 0;
865 /* Nonzero if chip supports Thumb 1. */
868 /* Nonzero if chip supports Thumb 2. */
871 /* Nonzero if chip supports integer division instruction. */
872 int arm_arch_arm_hwdiv
;
873 int arm_arch_thumb_hwdiv
;
875 /* Nonzero if chip disallows volatile memory access in IT block. */
876 int arm_arch_no_volatile_ce
;
878 /* Nonzero if we should use Neon to handle 64-bits operations rather
879 than core registers. */
880 int prefer_neon_for_64bits
= 0;
882 /* Nonzero if we shouldn't use literal pools. */
883 bool arm_disable_literal_pool
= false;
885 /* The register number to be used for the PIC offset register. */
886 unsigned arm_pic_register
= INVALID_REGNUM
;
888 enum arm_pcs arm_pcs_default
;
890 /* For an explanation of these variables, see final_prescan_insn below. */
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc
;
896 int arm_target_label
;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count
= 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask
= 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen
= 0;
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc
= 0;
908 /* Nonzero if the core has a very small, high-latency, multiply unit. */
909 int arm_m_profile_small_mul
= 0;
911 /* The condition codes of the ARM, and the inverse function. */
912 static const char * const arm_condition_codes
[] =
914 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
915 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
918 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
919 int arm_regs_in_sequence
[] =
921 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
924 #define ARM_LSL_NAME "lsl"
925 #define streq(string1, string2) (strcmp (string1, string2) == 0)
927 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
928 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
929 | (1 << PIC_OFFSET_TABLE_REGNUM)))
931 /* Initialization code. */
935 const char *const name
;
936 enum processor_type core
;
938 enum base_architecture base_arch
;
939 const arm_feature_set flags
;
940 const struct tune_params
*const tune
;
944 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
945 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
952 /* arm generic vectorizer costs. */
954 struct cpu_vec_costs arm_default_vec_cost
= {
955 1, /* scalar_stmt_cost. */
956 1, /* scalar load_cost. */
957 1, /* scalar_store_cost. */
958 1, /* vec_stmt_cost. */
959 1, /* vec_to_scalar_cost. */
960 1, /* scalar_to_vec_cost. */
961 1, /* vec_align_load_cost. */
962 1, /* vec_unalign_load_cost. */
963 1, /* vec_unalign_store_cost. */
964 1, /* vec_store_cost. */
965 3, /* cond_taken_branch_cost. */
966 1, /* cond_not_taken_branch_cost. */
969 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
970 #include "aarch-cost-tables.h"
974 const struct cpu_cost_table cortexa9_extra_costs
=
981 COSTS_N_INSNS (1), /* shift_reg. */
982 COSTS_N_INSNS (1), /* arith_shift. */
983 COSTS_N_INSNS (2), /* arith_shift_reg. */
985 COSTS_N_INSNS (1), /* log_shift_reg. */
986 COSTS_N_INSNS (1), /* extend. */
987 COSTS_N_INSNS (2), /* extend_arith. */
988 COSTS_N_INSNS (1), /* bfi. */
989 COSTS_N_INSNS (1), /* bfx. */
993 true /* non_exec_costs_exec. */
998 COSTS_N_INSNS (3), /* simple. */
999 COSTS_N_INSNS (3), /* flag_setting. */
1000 COSTS_N_INSNS (2), /* extend. */
1001 COSTS_N_INSNS (3), /* add. */
1002 COSTS_N_INSNS (2), /* extend_add. */
1003 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1007 0, /* simple (N/A). */
1008 0, /* flag_setting (N/A). */
1009 COSTS_N_INSNS (4), /* extend. */
1011 COSTS_N_INSNS (4), /* extend_add. */
1017 COSTS_N_INSNS (2), /* load. */
1018 COSTS_N_INSNS (2), /* load_sign_extend. */
1019 COSTS_N_INSNS (2), /* ldrd. */
1020 COSTS_N_INSNS (2), /* ldm_1st. */
1021 1, /* ldm_regs_per_insn_1st. */
1022 2, /* ldm_regs_per_insn_subsequent. */
1023 COSTS_N_INSNS (5), /* loadf. */
1024 COSTS_N_INSNS (5), /* loadd. */
1025 COSTS_N_INSNS (1), /* load_unaligned. */
1026 COSTS_N_INSNS (2), /* store. */
1027 COSTS_N_INSNS (2), /* strd. */
1028 COSTS_N_INSNS (2), /* stm_1st. */
1029 1, /* stm_regs_per_insn_1st. */
1030 2, /* stm_regs_per_insn_subsequent. */
1031 COSTS_N_INSNS (1), /* storef. */
1032 COSTS_N_INSNS (1), /* stored. */
1033 COSTS_N_INSNS (1), /* store_unaligned. */
1034 COSTS_N_INSNS (1), /* loadv. */
1035 COSTS_N_INSNS (1) /* storev. */
1040 COSTS_N_INSNS (14), /* div. */
1041 COSTS_N_INSNS (4), /* mult. */
1042 COSTS_N_INSNS (7), /* mult_addsub. */
1043 COSTS_N_INSNS (30), /* fma. */
1044 COSTS_N_INSNS (3), /* addsub. */
1045 COSTS_N_INSNS (1), /* fpconst. */
1046 COSTS_N_INSNS (1), /* neg. */
1047 COSTS_N_INSNS (3), /* compare. */
1048 COSTS_N_INSNS (3), /* widen. */
1049 COSTS_N_INSNS (3), /* narrow. */
1050 COSTS_N_INSNS (3), /* toint. */
1051 COSTS_N_INSNS (3), /* fromint. */
1052 COSTS_N_INSNS (3) /* roundint. */
1056 COSTS_N_INSNS (24), /* div. */
1057 COSTS_N_INSNS (5), /* mult. */
1058 COSTS_N_INSNS (8), /* mult_addsub. */
1059 COSTS_N_INSNS (30), /* fma. */
1060 COSTS_N_INSNS (3), /* addsub. */
1061 COSTS_N_INSNS (1), /* fpconst. */
1062 COSTS_N_INSNS (1), /* neg. */
1063 COSTS_N_INSNS (3), /* compare. */
1064 COSTS_N_INSNS (3), /* widen. */
1065 COSTS_N_INSNS (3), /* narrow. */
1066 COSTS_N_INSNS (3), /* toint. */
1067 COSTS_N_INSNS (3), /* fromint. */
1068 COSTS_N_INSNS (3) /* roundint. */
1073 COSTS_N_INSNS (1) /* alu. */
1077 const struct cpu_cost_table cortexa8_extra_costs
=
1083 COSTS_N_INSNS (1), /* shift. */
1085 COSTS_N_INSNS (1), /* arith_shift. */
1086 0, /* arith_shift_reg. */
1087 COSTS_N_INSNS (1), /* log_shift. */
1088 0, /* log_shift_reg. */
1090 0, /* extend_arith. */
1096 true /* non_exec_costs_exec. */
1101 COSTS_N_INSNS (1), /* simple. */
1102 COSTS_N_INSNS (1), /* flag_setting. */
1103 COSTS_N_INSNS (1), /* extend. */
1104 COSTS_N_INSNS (1), /* add. */
1105 COSTS_N_INSNS (1), /* extend_add. */
1106 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1110 0, /* simple (N/A). */
1111 0, /* flag_setting (N/A). */
1112 COSTS_N_INSNS (2), /* extend. */
1114 COSTS_N_INSNS (2), /* extend_add. */
1120 COSTS_N_INSNS (1), /* load. */
1121 COSTS_N_INSNS (1), /* load_sign_extend. */
1122 COSTS_N_INSNS (1), /* ldrd. */
1123 COSTS_N_INSNS (1), /* ldm_1st. */
1124 1, /* ldm_regs_per_insn_1st. */
1125 2, /* ldm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* loadf. */
1127 COSTS_N_INSNS (1), /* loadd. */
1128 COSTS_N_INSNS (1), /* load_unaligned. */
1129 COSTS_N_INSNS (1), /* store. */
1130 COSTS_N_INSNS (1), /* strd. */
1131 COSTS_N_INSNS (1), /* stm_1st. */
1132 1, /* stm_regs_per_insn_1st. */
1133 2, /* stm_regs_per_insn_subsequent. */
1134 COSTS_N_INSNS (1), /* storef. */
1135 COSTS_N_INSNS (1), /* stored. */
1136 COSTS_N_INSNS (1), /* store_unaligned. */
1137 COSTS_N_INSNS (1), /* loadv. */
1138 COSTS_N_INSNS (1) /* storev. */
1143 COSTS_N_INSNS (36), /* div. */
1144 COSTS_N_INSNS (11), /* mult. */
1145 COSTS_N_INSNS (20), /* mult_addsub. */
1146 COSTS_N_INSNS (30), /* fma. */
1147 COSTS_N_INSNS (9), /* addsub. */
1148 COSTS_N_INSNS (3), /* fpconst. */
1149 COSTS_N_INSNS (3), /* neg. */
1150 COSTS_N_INSNS (6), /* compare. */
1151 COSTS_N_INSNS (4), /* widen. */
1152 COSTS_N_INSNS (4), /* narrow. */
1153 COSTS_N_INSNS (8), /* toint. */
1154 COSTS_N_INSNS (8), /* fromint. */
1155 COSTS_N_INSNS (8) /* roundint. */
1159 COSTS_N_INSNS (64), /* div. */
1160 COSTS_N_INSNS (16), /* mult. */
1161 COSTS_N_INSNS (25), /* mult_addsub. */
1162 COSTS_N_INSNS (30), /* fma. */
1163 COSTS_N_INSNS (9), /* addsub. */
1164 COSTS_N_INSNS (3), /* fpconst. */
1165 COSTS_N_INSNS (3), /* neg. */
1166 COSTS_N_INSNS (6), /* compare. */
1167 COSTS_N_INSNS (6), /* widen. */
1168 COSTS_N_INSNS (6), /* narrow. */
1169 COSTS_N_INSNS (8), /* toint. */
1170 COSTS_N_INSNS (8), /* fromint. */
1171 COSTS_N_INSNS (8) /* roundint. */
1176 COSTS_N_INSNS (1) /* alu. */
1180 const struct cpu_cost_table cortexa5_extra_costs
=
1186 COSTS_N_INSNS (1), /* shift. */
1187 COSTS_N_INSNS (1), /* shift_reg. */
1188 COSTS_N_INSNS (1), /* arith_shift. */
1189 COSTS_N_INSNS (1), /* arith_shift_reg. */
1190 COSTS_N_INSNS (1), /* log_shift. */
1191 COSTS_N_INSNS (1), /* log_shift_reg. */
1192 COSTS_N_INSNS (1), /* extend. */
1193 COSTS_N_INSNS (1), /* extend_arith. */
1194 COSTS_N_INSNS (1), /* bfi. */
1195 COSTS_N_INSNS (1), /* bfx. */
1196 COSTS_N_INSNS (1), /* clz. */
1197 COSTS_N_INSNS (1), /* rev. */
1199 true /* non_exec_costs_exec. */
1206 COSTS_N_INSNS (1), /* flag_setting. */
1207 COSTS_N_INSNS (1), /* extend. */
1208 COSTS_N_INSNS (1), /* add. */
1209 COSTS_N_INSNS (1), /* extend_add. */
1210 COSTS_N_INSNS (7) /* idiv. */
1214 0, /* simple (N/A). */
1215 0, /* flag_setting (N/A). */
1216 COSTS_N_INSNS (1), /* extend. */
1218 COSTS_N_INSNS (2), /* extend_add. */
1224 COSTS_N_INSNS (1), /* load. */
1225 COSTS_N_INSNS (1), /* load_sign_extend. */
1226 COSTS_N_INSNS (6), /* ldrd. */
1227 COSTS_N_INSNS (1), /* ldm_1st. */
1228 1, /* ldm_regs_per_insn_1st. */
1229 2, /* ldm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* loadf. */
1231 COSTS_N_INSNS (4), /* loadd. */
1232 COSTS_N_INSNS (1), /* load_unaligned. */
1233 COSTS_N_INSNS (1), /* store. */
1234 COSTS_N_INSNS (3), /* strd. */
1235 COSTS_N_INSNS (1), /* stm_1st. */
1236 1, /* stm_regs_per_insn_1st. */
1237 2, /* stm_regs_per_insn_subsequent. */
1238 COSTS_N_INSNS (2), /* storef. */
1239 COSTS_N_INSNS (2), /* stored. */
1240 COSTS_N_INSNS (1), /* store_unaligned. */
1241 COSTS_N_INSNS (1), /* loadv. */
1242 COSTS_N_INSNS (1) /* storev. */
1247 COSTS_N_INSNS (15), /* div. */
1248 COSTS_N_INSNS (3), /* mult. */
1249 COSTS_N_INSNS (7), /* mult_addsub. */
1250 COSTS_N_INSNS (7), /* fma. */
1251 COSTS_N_INSNS (3), /* addsub. */
1252 COSTS_N_INSNS (3), /* fpconst. */
1253 COSTS_N_INSNS (3), /* neg. */
1254 COSTS_N_INSNS (3), /* compare. */
1255 COSTS_N_INSNS (3), /* widen. */
1256 COSTS_N_INSNS (3), /* narrow. */
1257 COSTS_N_INSNS (3), /* toint. */
1258 COSTS_N_INSNS (3), /* fromint. */
1259 COSTS_N_INSNS (3) /* roundint. */
1263 COSTS_N_INSNS (30), /* div. */
1264 COSTS_N_INSNS (6), /* mult. */
1265 COSTS_N_INSNS (10), /* mult_addsub. */
1266 COSTS_N_INSNS (7), /* fma. */
1267 COSTS_N_INSNS (3), /* addsub. */
1268 COSTS_N_INSNS (3), /* fpconst. */
1269 COSTS_N_INSNS (3), /* neg. */
1270 COSTS_N_INSNS (3), /* compare. */
1271 COSTS_N_INSNS (3), /* widen. */
1272 COSTS_N_INSNS (3), /* narrow. */
1273 COSTS_N_INSNS (3), /* toint. */
1274 COSTS_N_INSNS (3), /* fromint. */
1275 COSTS_N_INSNS (3) /* roundint. */
1280 COSTS_N_INSNS (1) /* alu. */
1285 const struct cpu_cost_table cortexa7_extra_costs
=
1291 COSTS_N_INSNS (1), /* shift. */
1292 COSTS_N_INSNS (1), /* shift_reg. */
1293 COSTS_N_INSNS (1), /* arith_shift. */
1294 COSTS_N_INSNS (1), /* arith_shift_reg. */
1295 COSTS_N_INSNS (1), /* log_shift. */
1296 COSTS_N_INSNS (1), /* log_shift_reg. */
1297 COSTS_N_INSNS (1), /* extend. */
1298 COSTS_N_INSNS (1), /* extend_arith. */
1299 COSTS_N_INSNS (1), /* bfi. */
1300 COSTS_N_INSNS (1), /* bfx. */
1301 COSTS_N_INSNS (1), /* clz. */
1302 COSTS_N_INSNS (1), /* rev. */
1304 true /* non_exec_costs_exec. */
1311 COSTS_N_INSNS (1), /* flag_setting. */
1312 COSTS_N_INSNS (1), /* extend. */
1313 COSTS_N_INSNS (1), /* add. */
1314 COSTS_N_INSNS (1), /* extend_add. */
1315 COSTS_N_INSNS (7) /* idiv. */
1319 0, /* simple (N/A). */
1320 0, /* flag_setting (N/A). */
1321 COSTS_N_INSNS (1), /* extend. */
1323 COSTS_N_INSNS (2), /* extend_add. */
1329 COSTS_N_INSNS (1), /* load. */
1330 COSTS_N_INSNS (1), /* load_sign_extend. */
1331 COSTS_N_INSNS (3), /* ldrd. */
1332 COSTS_N_INSNS (1), /* ldm_1st. */
1333 1, /* ldm_regs_per_insn_1st. */
1334 2, /* ldm_regs_per_insn_subsequent. */
1335 COSTS_N_INSNS (2), /* loadf. */
1336 COSTS_N_INSNS (2), /* loadd. */
1337 COSTS_N_INSNS (1), /* load_unaligned. */
1338 COSTS_N_INSNS (1), /* store. */
1339 COSTS_N_INSNS (3), /* strd. */
1340 COSTS_N_INSNS (1), /* stm_1st. */
1341 1, /* stm_regs_per_insn_1st. */
1342 2, /* stm_regs_per_insn_subsequent. */
1343 COSTS_N_INSNS (2), /* storef. */
1344 COSTS_N_INSNS (2), /* stored. */
1345 COSTS_N_INSNS (1), /* store_unaligned. */
1346 COSTS_N_INSNS (1), /* loadv. */
1347 COSTS_N_INSNS (1) /* storev. */
1352 COSTS_N_INSNS (15), /* div. */
1353 COSTS_N_INSNS (3), /* mult. */
1354 COSTS_N_INSNS (7), /* mult_addsub. */
1355 COSTS_N_INSNS (7), /* fma. */
1356 COSTS_N_INSNS (3), /* addsub. */
1357 COSTS_N_INSNS (3), /* fpconst. */
1358 COSTS_N_INSNS (3), /* neg. */
1359 COSTS_N_INSNS (3), /* compare. */
1360 COSTS_N_INSNS (3), /* widen. */
1361 COSTS_N_INSNS (3), /* narrow. */
1362 COSTS_N_INSNS (3), /* toint. */
1363 COSTS_N_INSNS (3), /* fromint. */
1364 COSTS_N_INSNS (3) /* roundint. */
1368 COSTS_N_INSNS (30), /* div. */
1369 COSTS_N_INSNS (6), /* mult. */
1370 COSTS_N_INSNS (10), /* mult_addsub. */
1371 COSTS_N_INSNS (7), /* fma. */
1372 COSTS_N_INSNS (3), /* addsub. */
1373 COSTS_N_INSNS (3), /* fpconst. */
1374 COSTS_N_INSNS (3), /* neg. */
1375 COSTS_N_INSNS (3), /* compare. */
1376 COSTS_N_INSNS (3), /* widen. */
1377 COSTS_N_INSNS (3), /* narrow. */
1378 COSTS_N_INSNS (3), /* toint. */
1379 COSTS_N_INSNS (3), /* fromint. */
1380 COSTS_N_INSNS (3) /* roundint. */
1385 COSTS_N_INSNS (1) /* alu. */
1389 const struct cpu_cost_table cortexa12_extra_costs
=
1396 COSTS_N_INSNS (1), /* shift_reg. */
1397 COSTS_N_INSNS (1), /* arith_shift. */
1398 COSTS_N_INSNS (1), /* arith_shift_reg. */
1399 COSTS_N_INSNS (1), /* log_shift. */
1400 COSTS_N_INSNS (1), /* log_shift_reg. */
1402 COSTS_N_INSNS (1), /* extend_arith. */
1404 COSTS_N_INSNS (1), /* bfx. */
1405 COSTS_N_INSNS (1), /* clz. */
1406 COSTS_N_INSNS (1), /* rev. */
1408 true /* non_exec_costs_exec. */
1413 COSTS_N_INSNS (2), /* simple. */
1414 COSTS_N_INSNS (3), /* flag_setting. */
1415 COSTS_N_INSNS (2), /* extend. */
1416 COSTS_N_INSNS (3), /* add. */
1417 COSTS_N_INSNS (2), /* extend_add. */
1418 COSTS_N_INSNS (18) /* idiv. */
1422 0, /* simple (N/A). */
1423 0, /* flag_setting (N/A). */
1424 COSTS_N_INSNS (3), /* extend. */
1426 COSTS_N_INSNS (3), /* extend_add. */
1432 COSTS_N_INSNS (3), /* load. */
1433 COSTS_N_INSNS (3), /* load_sign_extend. */
1434 COSTS_N_INSNS (3), /* ldrd. */
1435 COSTS_N_INSNS (3), /* ldm_1st. */
1436 1, /* ldm_regs_per_insn_1st. */
1437 2, /* ldm_regs_per_insn_subsequent. */
1438 COSTS_N_INSNS (3), /* loadf. */
1439 COSTS_N_INSNS (3), /* loadd. */
1440 0, /* load_unaligned. */
1444 1, /* stm_regs_per_insn_1st. */
1445 2, /* stm_regs_per_insn_subsequent. */
1446 COSTS_N_INSNS (2), /* storef. */
1447 COSTS_N_INSNS (2), /* stored. */
1448 0, /* store_unaligned. */
1449 COSTS_N_INSNS (1), /* loadv. */
1450 COSTS_N_INSNS (1) /* storev. */
1455 COSTS_N_INSNS (17), /* div. */
1456 COSTS_N_INSNS (4), /* mult. */
1457 COSTS_N_INSNS (8), /* mult_addsub. */
1458 COSTS_N_INSNS (8), /* fma. */
1459 COSTS_N_INSNS (4), /* addsub. */
1460 COSTS_N_INSNS (2), /* fpconst. */
1461 COSTS_N_INSNS (2), /* neg. */
1462 COSTS_N_INSNS (2), /* compare. */
1463 COSTS_N_INSNS (4), /* widen. */
1464 COSTS_N_INSNS (4), /* narrow. */
1465 COSTS_N_INSNS (4), /* toint. */
1466 COSTS_N_INSNS (4), /* fromint. */
1467 COSTS_N_INSNS (4) /* roundint. */
1471 COSTS_N_INSNS (31), /* div. */
1472 COSTS_N_INSNS (4), /* mult. */
1473 COSTS_N_INSNS (8), /* mult_addsub. */
1474 COSTS_N_INSNS (8), /* fma. */
1475 COSTS_N_INSNS (4), /* addsub. */
1476 COSTS_N_INSNS (2), /* fpconst. */
1477 COSTS_N_INSNS (2), /* neg. */
1478 COSTS_N_INSNS (2), /* compare. */
1479 COSTS_N_INSNS (4), /* widen. */
1480 COSTS_N_INSNS (4), /* narrow. */
1481 COSTS_N_INSNS (4), /* toint. */
1482 COSTS_N_INSNS (4), /* fromint. */
1483 COSTS_N_INSNS (4) /* roundint. */
1488 COSTS_N_INSNS (1) /* alu. */
1492 const struct cpu_cost_table cortexa15_extra_costs
=
1500 COSTS_N_INSNS (1), /* arith_shift. */
1501 COSTS_N_INSNS (1), /* arith_shift_reg. */
1502 COSTS_N_INSNS (1), /* log_shift. */
1503 COSTS_N_INSNS (1), /* log_shift_reg. */
1505 COSTS_N_INSNS (1), /* extend_arith. */
1506 COSTS_N_INSNS (1), /* bfi. */
1511 true /* non_exec_costs_exec. */
1516 COSTS_N_INSNS (2), /* simple. */
1517 COSTS_N_INSNS (3), /* flag_setting. */
1518 COSTS_N_INSNS (2), /* extend. */
1519 COSTS_N_INSNS (2), /* add. */
1520 COSTS_N_INSNS (2), /* extend_add. */
1521 COSTS_N_INSNS (18) /* idiv. */
1525 0, /* simple (N/A). */
1526 0, /* flag_setting (N/A). */
1527 COSTS_N_INSNS (3), /* extend. */
1529 COSTS_N_INSNS (3), /* extend_add. */
1535 COSTS_N_INSNS (3), /* load. */
1536 COSTS_N_INSNS (3), /* load_sign_extend. */
1537 COSTS_N_INSNS (3), /* ldrd. */
1538 COSTS_N_INSNS (4), /* ldm_1st. */
1539 1, /* ldm_regs_per_insn_1st. */
1540 2, /* ldm_regs_per_insn_subsequent. */
1541 COSTS_N_INSNS (4), /* loadf. */
1542 COSTS_N_INSNS (4), /* loadd. */
1543 0, /* load_unaligned. */
1546 COSTS_N_INSNS (1), /* stm_1st. */
1547 1, /* stm_regs_per_insn_1st. */
1548 2, /* stm_regs_per_insn_subsequent. */
1551 0, /* store_unaligned. */
1552 COSTS_N_INSNS (1), /* loadv. */
1553 COSTS_N_INSNS (1) /* storev. */
1558 COSTS_N_INSNS (17), /* div. */
1559 COSTS_N_INSNS (4), /* mult. */
1560 COSTS_N_INSNS (8), /* mult_addsub. */
1561 COSTS_N_INSNS (8), /* fma. */
1562 COSTS_N_INSNS (4), /* addsub. */
1563 COSTS_N_INSNS (2), /* fpconst. */
1564 COSTS_N_INSNS (2), /* neg. */
1565 COSTS_N_INSNS (5), /* compare. */
1566 COSTS_N_INSNS (4), /* widen. */
1567 COSTS_N_INSNS (4), /* narrow. */
1568 COSTS_N_INSNS (4), /* toint. */
1569 COSTS_N_INSNS (4), /* fromint. */
1570 COSTS_N_INSNS (4) /* roundint. */
1574 COSTS_N_INSNS (31), /* div. */
1575 COSTS_N_INSNS (4), /* mult. */
1576 COSTS_N_INSNS (8), /* mult_addsub. */
1577 COSTS_N_INSNS (8), /* fma. */
1578 COSTS_N_INSNS (4), /* addsub. */
1579 COSTS_N_INSNS (2), /* fpconst. */
1580 COSTS_N_INSNS (2), /* neg. */
1581 COSTS_N_INSNS (2), /* compare. */
1582 COSTS_N_INSNS (4), /* widen. */
1583 COSTS_N_INSNS (4), /* narrow. */
1584 COSTS_N_INSNS (4), /* toint. */
1585 COSTS_N_INSNS (4), /* fromint. */
1586 COSTS_N_INSNS (4) /* roundint. */
1591 COSTS_N_INSNS (1) /* alu. */
1595 const struct cpu_cost_table v7m_extra_costs
=
1603 0, /* arith_shift. */
1604 COSTS_N_INSNS (1), /* arith_shift_reg. */
1606 COSTS_N_INSNS (1), /* log_shift_reg. */
1608 COSTS_N_INSNS (1), /* extend_arith. */
1613 COSTS_N_INSNS (1), /* non_exec. */
1614 false /* non_exec_costs_exec. */
1619 COSTS_N_INSNS (1), /* simple. */
1620 COSTS_N_INSNS (1), /* flag_setting. */
1621 COSTS_N_INSNS (2), /* extend. */
1622 COSTS_N_INSNS (1), /* add. */
1623 COSTS_N_INSNS (3), /* extend_add. */
1624 COSTS_N_INSNS (8) /* idiv. */
1628 0, /* simple (N/A). */
1629 0, /* flag_setting (N/A). */
1630 COSTS_N_INSNS (2), /* extend. */
1632 COSTS_N_INSNS (3), /* extend_add. */
1638 COSTS_N_INSNS (2), /* load. */
1639 0, /* load_sign_extend. */
1640 COSTS_N_INSNS (3), /* ldrd. */
1641 COSTS_N_INSNS (2), /* ldm_1st. */
1642 1, /* ldm_regs_per_insn_1st. */
1643 1, /* ldm_regs_per_insn_subsequent. */
1644 COSTS_N_INSNS (2), /* loadf. */
1645 COSTS_N_INSNS (3), /* loadd. */
1646 COSTS_N_INSNS (1), /* load_unaligned. */
1647 COSTS_N_INSNS (2), /* store. */
1648 COSTS_N_INSNS (3), /* strd. */
1649 COSTS_N_INSNS (2), /* stm_1st. */
1650 1, /* stm_regs_per_insn_1st. */
1651 1, /* stm_regs_per_insn_subsequent. */
1652 COSTS_N_INSNS (2), /* storef. */
1653 COSTS_N_INSNS (3), /* stored. */
1654 COSTS_N_INSNS (1), /* store_unaligned. */
1655 COSTS_N_INSNS (1), /* loadv. */
1656 COSTS_N_INSNS (1) /* storev. */
1661 COSTS_N_INSNS (7), /* div. */
1662 COSTS_N_INSNS (2), /* mult. */
1663 COSTS_N_INSNS (5), /* mult_addsub. */
1664 COSTS_N_INSNS (3), /* fma. */
1665 COSTS_N_INSNS (1), /* addsub. */
1677 COSTS_N_INSNS (15), /* div. */
1678 COSTS_N_INSNS (5), /* mult. */
1679 COSTS_N_INSNS (7), /* mult_addsub. */
1680 COSTS_N_INSNS (7), /* fma. */
1681 COSTS_N_INSNS (3), /* addsub. */
1694 COSTS_N_INSNS (1) /* alu. */
1698 const struct tune_params arm_slowmul_tune
=
1700 &generic_extra_costs
, /* Insn extra costs. */
1701 NULL
, /* Sched adj cost. */
1702 arm_default_branch_cost
,
1703 &arm_default_vec_cost
,
1704 3, /* Constant limit. */
1705 5, /* Max cond insns. */
1706 8, /* Memset max inline. */
1707 1, /* Issue rate. */
1708 ARM_PREFETCH_NOT_BENEFICIAL
,
1709 tune_params::PREF_CONST_POOL_TRUE
,
1710 tune_params::PREF_LDRD_FALSE
,
1711 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1712 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1713 tune_params::DISPARAGE_FLAGS_NEITHER
,
1714 tune_params::PREF_NEON_64_FALSE
,
1715 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1716 tune_params::FUSE_NOTHING
,
1717 tune_params::SCHED_AUTOPREF_OFF
1720 const struct tune_params arm_fastmul_tune
=
1722 &generic_extra_costs
, /* Insn extra costs. */
1723 NULL
, /* Sched adj cost. */
1724 arm_default_branch_cost
,
1725 &arm_default_vec_cost
,
1726 1, /* Constant limit. */
1727 5, /* Max cond insns. */
1728 8, /* Memset max inline. */
1729 1, /* Issue rate. */
1730 ARM_PREFETCH_NOT_BENEFICIAL
,
1731 tune_params::PREF_CONST_POOL_TRUE
,
1732 tune_params::PREF_LDRD_FALSE
,
1733 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1734 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1735 tune_params::DISPARAGE_FLAGS_NEITHER
,
1736 tune_params::PREF_NEON_64_FALSE
,
1737 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1738 tune_params::FUSE_NOTHING
,
1739 tune_params::SCHED_AUTOPREF_OFF
1742 /* StrongARM has early execution of branches, so a sequence that is worth
1743 skipping is shorter. Set max_insns_skipped to a lower value. */
1745 const struct tune_params arm_strongarm_tune
=
1747 &generic_extra_costs
, /* Insn extra costs. */
1748 NULL
, /* Sched adj cost. */
1749 arm_default_branch_cost
,
1750 &arm_default_vec_cost
,
1751 1, /* Constant limit. */
1752 3, /* Max cond insns. */
1753 8, /* Memset max inline. */
1754 1, /* Issue rate. */
1755 ARM_PREFETCH_NOT_BENEFICIAL
,
1756 tune_params::PREF_CONST_POOL_TRUE
,
1757 tune_params::PREF_LDRD_FALSE
,
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1759 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1760 tune_params::DISPARAGE_FLAGS_NEITHER
,
1761 tune_params::PREF_NEON_64_FALSE
,
1762 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1763 tune_params::FUSE_NOTHING
,
1764 tune_params::SCHED_AUTOPREF_OFF
1767 const struct tune_params arm_xscale_tune
=
1769 &generic_extra_costs
, /* Insn extra costs. */
1770 xscale_sched_adjust_cost
,
1771 arm_default_branch_cost
,
1772 &arm_default_vec_cost
,
1773 2, /* Constant limit. */
1774 3, /* Max cond insns. */
1775 8, /* Memset max inline. */
1776 1, /* Issue rate. */
1777 ARM_PREFETCH_NOT_BENEFICIAL
,
1778 tune_params::PREF_CONST_POOL_TRUE
,
1779 tune_params::PREF_LDRD_FALSE
,
1780 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1781 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1782 tune_params::DISPARAGE_FLAGS_NEITHER
,
1783 tune_params::PREF_NEON_64_FALSE
,
1784 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1785 tune_params::FUSE_NOTHING
,
1786 tune_params::SCHED_AUTOPREF_OFF
1789 const struct tune_params arm_9e_tune
=
1791 &generic_extra_costs
, /* Insn extra costs. */
1792 NULL
, /* Sched adj cost. */
1793 arm_default_branch_cost
,
1794 &arm_default_vec_cost
,
1795 1, /* Constant limit. */
1796 5, /* Max cond insns. */
1797 8, /* Memset max inline. */
1798 1, /* Issue rate. */
1799 ARM_PREFETCH_NOT_BENEFICIAL
,
1800 tune_params::PREF_CONST_POOL_TRUE
,
1801 tune_params::PREF_LDRD_FALSE
,
1802 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1803 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1804 tune_params::DISPARAGE_FLAGS_NEITHER
,
1805 tune_params::PREF_NEON_64_FALSE
,
1806 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1807 tune_params::FUSE_NOTHING
,
1808 tune_params::SCHED_AUTOPREF_OFF
1811 const struct tune_params arm_marvell_pj4_tune
=
1813 &generic_extra_costs
, /* Insn extra costs. */
1814 NULL
, /* Sched adj cost. */
1815 arm_default_branch_cost
,
1816 &arm_default_vec_cost
,
1817 1, /* Constant limit. */
1818 5, /* Max cond insns. */
1819 8, /* Memset max inline. */
1820 2, /* Issue rate. */
1821 ARM_PREFETCH_NOT_BENEFICIAL
,
1822 tune_params::PREF_CONST_POOL_TRUE
,
1823 tune_params::PREF_LDRD_FALSE
,
1824 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1825 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1826 tune_params::DISPARAGE_FLAGS_NEITHER
,
1827 tune_params::PREF_NEON_64_FALSE
,
1828 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1829 tune_params::FUSE_NOTHING
,
1830 tune_params::SCHED_AUTOPREF_OFF
1833 const struct tune_params arm_v6t2_tune
=
1835 &generic_extra_costs
, /* Insn extra costs. */
1836 NULL
, /* Sched adj cost. */
1837 arm_default_branch_cost
,
1838 &arm_default_vec_cost
,
1839 1, /* Constant limit. */
1840 5, /* Max cond insns. */
1841 8, /* Memset max inline. */
1842 1, /* Issue rate. */
1843 ARM_PREFETCH_NOT_BENEFICIAL
,
1844 tune_params::PREF_CONST_POOL_FALSE
,
1845 tune_params::PREF_LDRD_FALSE
,
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1847 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1848 tune_params::DISPARAGE_FLAGS_NEITHER
,
1849 tune_params::PREF_NEON_64_FALSE
,
1850 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1851 tune_params::FUSE_NOTHING
,
1852 tune_params::SCHED_AUTOPREF_OFF
1856 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1857 const struct tune_params arm_cortex_tune
=
1859 &generic_extra_costs
,
1860 NULL
, /* Sched adj cost. */
1861 arm_default_branch_cost
,
1862 &arm_default_vec_cost
,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 2, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL
,
1868 tune_params::PREF_CONST_POOL_FALSE
,
1869 tune_params::PREF_LDRD_FALSE
,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER
,
1873 tune_params::PREF_NEON_64_FALSE
,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1875 tune_params::FUSE_NOTHING
,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_cortex_a8_tune
=
1881 &cortexa8_extra_costs
,
1882 NULL
, /* Sched adj cost. */
1883 arm_default_branch_cost
,
1884 &arm_default_vec_cost
,
1885 1, /* Constant limit. */
1886 5, /* Max cond insns. */
1887 8, /* Memset max inline. */
1888 2, /* Issue rate. */
1889 ARM_PREFETCH_NOT_BENEFICIAL
,
1890 tune_params::PREF_CONST_POOL_FALSE
,
1891 tune_params::PREF_LDRD_FALSE
,
1892 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1894 tune_params::DISPARAGE_FLAGS_NEITHER
,
1895 tune_params::PREF_NEON_64_FALSE
,
1896 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1897 tune_params::FUSE_NOTHING
,
1898 tune_params::SCHED_AUTOPREF_OFF
1901 const struct tune_params arm_cortex_a7_tune
=
1903 &cortexa7_extra_costs
,
1904 NULL
, /* Sched adj cost. */
1905 arm_default_branch_cost
,
1906 &arm_default_vec_cost
,
1907 1, /* Constant limit. */
1908 5, /* Max cond insns. */
1909 8, /* Memset max inline. */
1910 2, /* Issue rate. */
1911 ARM_PREFETCH_NOT_BENEFICIAL
,
1912 tune_params::PREF_CONST_POOL_FALSE
,
1913 tune_params::PREF_LDRD_FALSE
,
1914 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1915 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1916 tune_params::DISPARAGE_FLAGS_NEITHER
,
1917 tune_params::PREF_NEON_64_FALSE
,
1918 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1919 tune_params::FUSE_NOTHING
,
1920 tune_params::SCHED_AUTOPREF_OFF
1923 const struct tune_params arm_cortex_a15_tune
=
1925 &cortexa15_extra_costs
,
1926 NULL
, /* Sched adj cost. */
1927 arm_default_branch_cost
,
1928 &arm_default_vec_cost
,
1929 1, /* Constant limit. */
1930 2, /* Max cond insns. */
1931 8, /* Memset max inline. */
1932 3, /* Issue rate. */
1933 ARM_PREFETCH_NOT_BENEFICIAL
,
1934 tune_params::PREF_CONST_POOL_FALSE
,
1935 tune_params::PREF_LDRD_TRUE
,
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1937 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1938 tune_params::DISPARAGE_FLAGS_ALL
,
1939 tune_params::PREF_NEON_64_FALSE
,
1940 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1941 tune_params::FUSE_NOTHING
,
1942 tune_params::SCHED_AUTOPREF_FULL
1945 const struct tune_params arm_cortex_a35_tune
=
1947 &cortexa53_extra_costs
,
1948 NULL
, /* Sched adj cost. */
1949 arm_default_branch_cost
,
1950 &arm_default_vec_cost
,
1951 1, /* Constant limit. */
1952 5, /* Max cond insns. */
1953 8, /* Memset max inline. */
1954 1, /* Issue rate. */
1955 ARM_PREFETCH_NOT_BENEFICIAL
,
1956 tune_params::PREF_CONST_POOL_FALSE
,
1957 tune_params::PREF_LDRD_FALSE
,
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1960 tune_params::DISPARAGE_FLAGS_NEITHER
,
1961 tune_params::PREF_NEON_64_FALSE
,
1962 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1963 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1964 tune_params::SCHED_AUTOPREF_OFF
1967 const struct tune_params arm_cortex_a53_tune
=
1969 &cortexa53_extra_costs
,
1970 NULL
, /* Sched adj cost. */
1971 arm_default_branch_cost
,
1972 &arm_default_vec_cost
,
1973 1, /* Constant limit. */
1974 5, /* Max cond insns. */
1975 8, /* Memset max inline. */
1976 2, /* Issue rate. */
1977 ARM_PREFETCH_NOT_BENEFICIAL
,
1978 tune_params::PREF_CONST_POOL_FALSE
,
1979 tune_params::PREF_LDRD_FALSE
,
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1982 tune_params::DISPARAGE_FLAGS_NEITHER
,
1983 tune_params::PREF_NEON_64_FALSE
,
1984 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1985 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
1986 tune_params::SCHED_AUTOPREF_OFF
1989 const struct tune_params arm_cortex_a57_tune
=
1991 &cortexa57_extra_costs
,
1992 NULL
, /* Sched adj cost. */
1993 arm_default_branch_cost
,
1994 &arm_default_vec_cost
,
1995 1, /* Constant limit. */
1996 2, /* Max cond insns. */
1997 8, /* Memset max inline. */
1998 3, /* Issue rate. */
1999 ARM_PREFETCH_NOT_BENEFICIAL
,
2000 tune_params::PREF_CONST_POOL_FALSE
,
2001 tune_params::PREF_LDRD_TRUE
,
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2004 tune_params::DISPARAGE_FLAGS_ALL
,
2005 tune_params::PREF_NEON_64_FALSE
,
2006 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2007 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2008 tune_params::SCHED_AUTOPREF_FULL
2011 const struct tune_params arm_exynosm1_tune
=
2013 &exynosm1_extra_costs
,
2014 NULL
, /* Sched adj cost. */
2015 arm_default_branch_cost
,
2016 &arm_default_vec_cost
,
2017 1, /* Constant limit. */
2018 2, /* Max cond insns. */
2019 8, /* Memset max inline. */
2020 3, /* Issue rate. */
2021 ARM_PREFETCH_NOT_BENEFICIAL
,
2022 tune_params::PREF_CONST_POOL_FALSE
,
2023 tune_params::PREF_LDRD_TRUE
,
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2025 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2026 tune_params::DISPARAGE_FLAGS_ALL
,
2027 tune_params::PREF_NEON_64_FALSE
,
2028 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2029 tune_params::FUSE_NOTHING
,
2030 tune_params::SCHED_AUTOPREF_OFF
2033 const struct tune_params arm_xgene1_tune
=
2035 &xgene1_extra_costs
,
2036 NULL
, /* Sched adj cost. */
2037 arm_default_branch_cost
,
2038 &arm_default_vec_cost
,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 32, /* Memset max inline. */
2042 4, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL
,
2044 tune_params::PREF_CONST_POOL_FALSE
,
2045 tune_params::PREF_LDRD_TRUE
,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL
,
2049 tune_params::PREF_NEON_64_FALSE
,
2050 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2051 tune_params::FUSE_NOTHING
,
2052 tune_params::SCHED_AUTOPREF_OFF
2055 const struct tune_params arm_qdf24xx_tune
=
2057 &qdf24xx_extra_costs
,
2058 NULL
, /* Scheduler cost adjustment. */
2059 arm_default_branch_cost
,
2060 &arm_default_vec_cost
, /* Vectorizer costs. */
2061 1, /* Constant limit. */
2062 2, /* Max cond insns. */
2063 8, /* Memset max inline. */
2064 4, /* Issue rate. */
2065 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2066 tune_params::PREF_CONST_POOL_FALSE
,
2067 tune_params::PREF_LDRD_TRUE
,
2068 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2070 tune_params::DISPARAGE_FLAGS_ALL
,
2071 tune_params::PREF_NEON_64_FALSE
,
2072 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2073 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2074 tune_params::SCHED_AUTOPREF_FULL
2077 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2078 less appealing. Set max_insns_skipped to a low value. */
2080 const struct tune_params arm_cortex_a5_tune
=
2082 &cortexa5_extra_costs
,
2083 NULL
, /* Sched adj cost. */
2084 arm_cortex_a5_branch_cost
,
2085 &arm_default_vec_cost
,
2086 1, /* Constant limit. */
2087 1, /* Max cond insns. */
2088 8, /* Memset max inline. */
2089 2, /* Issue rate. */
2090 ARM_PREFETCH_NOT_BENEFICIAL
,
2091 tune_params::PREF_CONST_POOL_FALSE
,
2092 tune_params::PREF_LDRD_FALSE
,
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2094 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2095 tune_params::DISPARAGE_FLAGS_NEITHER
,
2096 tune_params::PREF_NEON_64_FALSE
,
2097 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2098 tune_params::FUSE_NOTHING
,
2099 tune_params::SCHED_AUTOPREF_OFF
2102 const struct tune_params arm_cortex_a9_tune
=
2104 &cortexa9_extra_costs
,
2105 cortex_a9_sched_adjust_cost
,
2106 arm_default_branch_cost
,
2107 &arm_default_vec_cost
,
2108 1, /* Constant limit. */
2109 5, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 2, /* Issue rate. */
2112 ARM_PREFETCH_BENEFICIAL(4,32,32),
2113 tune_params::PREF_CONST_POOL_FALSE
,
2114 tune_params::PREF_LDRD_FALSE
,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_NEITHER
,
2118 tune_params::PREF_NEON_64_FALSE
,
2119 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2120 tune_params::FUSE_NOTHING
,
2121 tune_params::SCHED_AUTOPREF_OFF
2124 const struct tune_params arm_cortex_a12_tune
=
2126 &cortexa12_extra_costs
,
2127 NULL
, /* Sched adj cost. */
2128 arm_default_branch_cost
,
2129 &arm_default_vec_cost
, /* Vectorizer costs. */
2130 1, /* Constant limit. */
2131 2, /* Max cond insns. */
2132 8, /* Memset max inline. */
2133 2, /* Issue rate. */
2134 ARM_PREFETCH_NOT_BENEFICIAL
,
2135 tune_params::PREF_CONST_POOL_FALSE
,
2136 tune_params::PREF_LDRD_TRUE
,
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2138 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2139 tune_params::DISPARAGE_FLAGS_ALL
,
2140 tune_params::PREF_NEON_64_FALSE
,
2141 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2142 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2143 tune_params::SCHED_AUTOPREF_OFF
2146 const struct tune_params arm_cortex_a73_tune
=
2148 &cortexa57_extra_costs
,
2149 NULL
, /* Sched adj cost. */
2150 arm_default_branch_cost
,
2151 &arm_default_vec_cost
, /* Vectorizer costs. */
2152 1, /* Constant limit. */
2153 2, /* Max cond insns. */
2154 8, /* Memset max inline. */
2155 2, /* Issue rate. */
2156 ARM_PREFETCH_NOT_BENEFICIAL
,
2157 tune_params::PREF_CONST_POOL_FALSE
,
2158 tune_params::PREF_LDRD_TRUE
,
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2160 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2161 tune_params::DISPARAGE_FLAGS_ALL
,
2162 tune_params::PREF_NEON_64_FALSE
,
2163 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2164 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2165 tune_params::SCHED_AUTOPREF_FULL
2168 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2169 cycle to execute each. An LDR from the constant pool also takes two cycles
2170 to execute, but mildly increases pipelining opportunity (consecutive
2171 loads/stores can be pipelined together, saving one cycle), and may also
2172 improve icache utilisation. Hence we prefer the constant pool for such
2175 const struct tune_params arm_v7m_tune
=
2178 NULL
, /* Sched adj cost. */
2179 arm_cortex_m_branch_cost
,
2180 &arm_default_vec_cost
,
2181 1, /* Constant limit. */
2182 2, /* Max cond insns. */
2183 8, /* Memset max inline. */
2184 1, /* Issue rate. */
2185 ARM_PREFETCH_NOT_BENEFICIAL
,
2186 tune_params::PREF_CONST_POOL_TRUE
,
2187 tune_params::PREF_LDRD_FALSE
,
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2189 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2190 tune_params::DISPARAGE_FLAGS_NEITHER
,
2191 tune_params::PREF_NEON_64_FALSE
,
2192 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2193 tune_params::FUSE_NOTHING
,
2194 tune_params::SCHED_AUTOPREF_OFF
2197 /* Cortex-M7 tuning. */
2199 const struct tune_params arm_cortex_m7_tune
=
2202 NULL
, /* Sched adj cost. */
2203 arm_cortex_m7_branch_cost
,
2204 &arm_default_vec_cost
,
2205 0, /* Constant limit. */
2206 1, /* Max cond insns. */
2207 8, /* Memset max inline. */
2208 2, /* Issue rate. */
2209 ARM_PREFETCH_NOT_BENEFICIAL
,
2210 tune_params::PREF_CONST_POOL_TRUE
,
2211 tune_params::PREF_LDRD_FALSE
,
2212 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2213 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2214 tune_params::DISPARAGE_FLAGS_NEITHER
,
2215 tune_params::PREF_NEON_64_FALSE
,
2216 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2217 tune_params::FUSE_NOTHING
,
2218 tune_params::SCHED_AUTOPREF_OFF
2221 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2222 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2224 const struct tune_params arm_v6m_tune
=
2226 &generic_extra_costs
, /* Insn extra costs. */
2227 NULL
, /* Sched adj cost. */
2228 arm_default_branch_cost
,
2229 &arm_default_vec_cost
, /* Vectorizer costs. */
2230 1, /* Constant limit. */
2231 5, /* Max cond insns. */
2232 8, /* Memset max inline. */
2233 1, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL
,
2235 tune_params::PREF_CONST_POOL_FALSE
,
2236 tune_params::PREF_LDRD_FALSE
,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_NEITHER
,
2240 tune_params::PREF_NEON_64_FALSE
,
2241 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2242 tune_params::FUSE_NOTHING
,
2243 tune_params::SCHED_AUTOPREF_OFF
2246 const struct tune_params arm_fa726te_tune
=
2248 &generic_extra_costs
, /* Insn extra costs. */
2249 fa726te_sched_adjust_cost
,
2250 arm_default_branch_cost
,
2251 &arm_default_vec_cost
,
2252 1, /* Constant limit. */
2253 5, /* Max cond insns. */
2254 8, /* Memset max inline. */
2255 2, /* Issue rate. */
2256 ARM_PREFETCH_NOT_BENEFICIAL
,
2257 tune_params::PREF_CONST_POOL_TRUE
,
2258 tune_params::PREF_LDRD_FALSE
,
2259 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2260 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2261 tune_params::DISPARAGE_FLAGS_NEITHER
,
2262 tune_params::PREF_NEON_64_FALSE
,
2263 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2264 tune_params::FUSE_NOTHING
,
2265 tune_params::SCHED_AUTOPREF_OFF
2269 /* Not all of these give usefully different compilation alternatives,
2270 but there is no simple way of generalizing them. */
2271 static const struct processors all_cores
[] =
2274 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2275 {NAME, TARGET_CPU_##IDENT, #ARCH, BASE_ARCH_##ARCH, \
2276 FLAGS, &arm_##COSTS##_tune},
2277 #include "arm-cores.def"
2279 {NULL
, TARGET_CPU_arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2282 static const struct processors all_architectures
[] =
2284 /* ARM Architectures */
2285 /* We don't specify tuning costs here as it will be figured out
2288 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2289 {NAME, TARGET_CPU_##CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2290 #include "arm-arches.def"
2292 {NULL
, TARGET_CPU_arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2296 /* These are populated as commandline arguments are processed, or NULL
2297 if not specified. */
2298 static const struct processors
*arm_selected_arch
;
2299 static const struct processors
*arm_selected_cpu
;
2300 static const struct processors
*arm_selected_tune
;
2302 /* The name of the preprocessor macro to define for this architecture. PROFILE
2303 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2304 is thus chosen to be big enough to hold the longest architecture name. */
2306 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2308 /* Available values for -mfpu=. */
2310 const struct arm_fpu_desc all_fpus
[] =
2312 #define ARM_FPU(NAME, REV, VFP_REGS, FEATURES) \
2313 { NAME, REV, VFP_REGS, FEATURES },
2314 #include "arm-fpus.def"
2318 /* Supported TLS relocations. */
2326 TLS_DESCSEQ
/* GNU scheme */
2329 /* The maximum number of insns to be used when loading a constant. */
2331 arm_constant_limit (bool size_p
)
2333 return size_p
? 1 : current_tune
->constant_limit
;
2336 /* Emit an insn that's a simple single-set. Both the operands must be known
2338 inline static rtx_insn
*
2339 emit_set_insn (rtx x
, rtx y
)
2341 return emit_insn (gen_rtx_SET (x
, y
));
2344 /* Return the number of bits set in VALUE. */
2346 bit_count (unsigned long value
)
2348 unsigned long count
= 0;
2353 value
&= value
- 1; /* Clear the least-significant set bit. */
2359 /* Return the number of features in feature-set SET. */
2361 feature_count (const arm_feature_set
* set
)
2363 return (bit_count (ARM_FSET_CPU1 (*set
))
2364 + bit_count (ARM_FSET_CPU2 (*set
)));
2371 } arm_fixed_mode_set
;
2373 /* A small helper for setting fixed-point library libfuncs. */
2376 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2377 const char *funcname
, const char *modename
,
2382 if (num_suffix
== 0)
2383 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2385 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2387 set_optab_libfunc (optable
, mode
, buffer
);
2391 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2392 machine_mode from
, const char *funcname
,
2393 const char *toname
, const char *fromname
)
2396 const char *maybe_suffix_2
= "";
2398 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2399 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2400 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2401 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2402 maybe_suffix_2
= "2";
2404 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2407 set_conv_libfunc (optable
, to
, from
, buffer
);
2410 /* Set up library functions unique to ARM. */
2413 arm_init_libfuncs (void)
2415 /* For Linux, we have access to kernel support for atomic operations. */
2416 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2417 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2419 /* There are no special library functions unless we are using the
2424 /* The functions below are described in Section 4 of the "Run-Time
2425 ABI for the ARM architecture", Version 1.0. */
2427 /* Double-precision floating-point arithmetic. Table 2. */
2428 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2429 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2430 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2431 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2432 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2434 /* Double-precision comparisons. Table 3. */
2435 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2436 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2437 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2438 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2439 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2440 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2441 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2443 /* Single-precision floating-point arithmetic. Table 4. */
2444 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2445 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2446 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2447 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2448 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2450 /* Single-precision comparisons. Table 5. */
2451 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2452 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2453 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2454 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2455 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2456 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2457 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2459 /* Floating-point to integer conversions. Table 6. */
2460 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2461 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2462 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2463 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2464 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2465 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2466 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2467 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2469 /* Conversions between floating types. Table 7. */
2470 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2471 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2473 /* Integer to floating-point conversions. Table 8. */
2474 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2475 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2476 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2477 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2478 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2479 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2480 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2481 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2483 /* Long long. Table 9. */
2484 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2485 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2486 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2487 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2488 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2489 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2490 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2491 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2493 /* Integer (32/32->32) division. \S 4.3.1. */
2494 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2495 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2497 /* The divmod functions are designed so that they can be used for
2498 plain division, even though they return both the quotient and the
2499 remainder. The quotient is returned in the usual location (i.e.,
2500 r0 for SImode, {r0, r1} for DImode), just as would be expected
2501 for an ordinary division routine. Because the AAPCS calling
2502 conventions specify that all of { r0, r1, r2, r3 } are
2503 callee-saved registers, there is no need to tell the compiler
2504 explicitly that those registers are clobbered by these
2506 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2507 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2509 /* For SImode division the ABI provides div-without-mod routines,
2510 which are faster. */
2511 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2512 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2514 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2515 divmod libcalls instead. */
2516 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2517 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2518 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2519 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2521 /* Half-precision float operations. The compiler handles all operations
2522 with NULL libfuncs by converting the SFmode. */
2523 switch (arm_fp16_format
)
2525 case ARM_FP16_FORMAT_IEEE
:
2526 case ARM_FP16_FORMAT_ALTERNATIVE
:
2529 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2530 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2532 : "__gnu_f2h_alternative"));
2533 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2534 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2536 : "__gnu_h2f_alternative"));
2539 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2540 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2541 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2542 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2543 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2546 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2547 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2548 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2549 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2550 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2551 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2552 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2559 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2561 const arm_fixed_mode_set fixed_arith_modes
[] =
2582 const arm_fixed_mode_set fixed_conv_modes
[] =
2612 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2614 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2615 "add", fixed_arith_modes
[i
].name
, 3);
2616 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2617 "ssadd", fixed_arith_modes
[i
].name
, 3);
2618 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2619 "usadd", fixed_arith_modes
[i
].name
, 3);
2620 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2621 "sub", fixed_arith_modes
[i
].name
, 3);
2622 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2623 "sssub", fixed_arith_modes
[i
].name
, 3);
2624 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2625 "ussub", fixed_arith_modes
[i
].name
, 3);
2626 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2627 "mul", fixed_arith_modes
[i
].name
, 3);
2628 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2629 "ssmul", fixed_arith_modes
[i
].name
, 3);
2630 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2631 "usmul", fixed_arith_modes
[i
].name
, 3);
2632 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2633 "div", fixed_arith_modes
[i
].name
, 3);
2634 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2635 "udiv", fixed_arith_modes
[i
].name
, 3);
2636 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2637 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2638 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2639 "usdiv", fixed_arith_modes
[i
].name
, 3);
2640 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2641 "neg", fixed_arith_modes
[i
].name
, 2);
2642 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2643 "ssneg", fixed_arith_modes
[i
].name
, 2);
2644 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2645 "usneg", fixed_arith_modes
[i
].name
, 2);
2646 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2647 "ashl", fixed_arith_modes
[i
].name
, 3);
2648 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2649 "ashr", fixed_arith_modes
[i
].name
, 3);
2650 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2651 "lshr", fixed_arith_modes
[i
].name
, 3);
2652 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2653 "ssashl", fixed_arith_modes
[i
].name
, 3);
2654 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2655 "usashl", fixed_arith_modes
[i
].name
, 3);
2656 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2657 "cmp", fixed_arith_modes
[i
].name
, 2);
2660 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2661 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2664 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2665 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2668 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2669 fixed_conv_modes
[j
].mode
, "fract",
2670 fixed_conv_modes
[i
].name
,
2671 fixed_conv_modes
[j
].name
);
2672 arm_set_fixed_conv_libfunc (satfract_optab
,
2673 fixed_conv_modes
[i
].mode
,
2674 fixed_conv_modes
[j
].mode
, "satfract",
2675 fixed_conv_modes
[i
].name
,
2676 fixed_conv_modes
[j
].name
);
2677 arm_set_fixed_conv_libfunc (fractuns_optab
,
2678 fixed_conv_modes
[i
].mode
,
2679 fixed_conv_modes
[j
].mode
, "fractuns",
2680 fixed_conv_modes
[i
].name
,
2681 fixed_conv_modes
[j
].name
);
2682 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2683 fixed_conv_modes
[i
].mode
,
2684 fixed_conv_modes
[j
].mode
, "satfractuns",
2685 fixed_conv_modes
[i
].name
,
2686 fixed_conv_modes
[j
].name
);
2690 if (TARGET_AAPCS_BASED
)
2691 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2694 /* On AAPCS systems, this is the "struct __va_list". */
2695 static GTY(()) tree va_list_type
;
2697 /* Return the type to use as __builtin_va_list. */
2699 arm_build_builtin_va_list (void)
2704 if (!TARGET_AAPCS_BASED
)
2705 return std_build_builtin_va_list ();
2707 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2715 The C Library ABI further reinforces this definition in \S
2718 We must follow this definition exactly. The structure tag
2719 name is visible in C++ mangled names, and thus forms a part
2720 of the ABI. The field name may be used by people who
2721 #include <stdarg.h>. */
2722 /* Create the type. */
2723 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2724 /* Give it the required name. */
2725 va_list_name
= build_decl (BUILTINS_LOCATION
,
2727 get_identifier ("__va_list"),
2729 DECL_ARTIFICIAL (va_list_name
) = 1;
2730 TYPE_NAME (va_list_type
) = va_list_name
;
2731 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2732 /* Create the __ap field. */
2733 ap_field
= build_decl (BUILTINS_LOCATION
,
2735 get_identifier ("__ap"),
2737 DECL_ARTIFICIAL (ap_field
) = 1;
2738 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2739 TYPE_FIELDS (va_list_type
) = ap_field
;
2740 /* Compute its layout. */
2741 layout_type (va_list_type
);
2743 return va_list_type
;
2746 /* Return an expression of type "void *" pointing to the next
2747 available argument in a variable-argument list. VALIST is the
2748 user-level va_list object, of type __builtin_va_list. */
2750 arm_extract_valist_ptr (tree valist
)
2752 if (TREE_TYPE (valist
) == error_mark_node
)
2753 return error_mark_node
;
2755 /* On an AAPCS target, the pointer is stored within "struct
2757 if (TARGET_AAPCS_BASED
)
2759 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2760 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2761 valist
, ap_field
, NULL_TREE
);
2767 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2769 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2771 valist
= arm_extract_valist_ptr (valist
);
2772 std_expand_builtin_va_start (valist
, nextarg
);
2775 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2777 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2780 valist
= arm_extract_valist_ptr (valist
);
2781 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2784 /* Check any incompatible options that the user has specified. */
2786 arm_option_check_internal (struct gcc_options
*opts
)
2788 int flags
= opts
->x_target_flags
;
2789 const struct arm_fpu_desc
*fpu_desc
= &all_fpus
[opts
->x_arm_fpu_index
];
2791 /* iWMMXt and NEON are incompatible. */
2793 && ARM_FPU_FSET_HAS (fpu_desc
->features
, FPU_FL_NEON
))
2794 error ("iWMMXt and NEON are incompatible");
2796 /* Make sure that the processor choice does not conflict with any of the
2797 other command line choices. */
2798 if (TARGET_ARM_P (flags
) && !ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
))
2799 error ("target CPU does not support ARM mode");
2801 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2802 from here where no function is being compiled currently. */
2803 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2804 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2806 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2807 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2809 /* If this target is normally configured to use APCS frames, warn if they
2810 are turned off and debugging is turned on. */
2811 if (TARGET_ARM_P (flags
)
2812 && write_symbols
!= NO_DEBUG
2813 && !TARGET_APCS_FRAME
2814 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2815 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2817 /* iWMMXt unsupported under Thumb mode. */
2818 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2819 error ("iWMMXt unsupported under Thumb mode");
2821 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2822 error ("can not use -mtp=cp15 with 16-bit Thumb");
2824 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2826 error ("RTP PIC is incompatible with Thumb");
2830 /* We only support -mslow-flash-data on armv7-m targets. */
2831 if (target_slow_flash_data
2832 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2833 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2834 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2836 /* We only support pure-code on Thumb-2 M-profile targets. */
2837 if (target_pure_code
2838 && (!arm_arch_thumb2
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2839 error ("-mpure-code only supports non-pic code on armv7-m targets");
2843 /* Recompute the global settings depending on target attribute options. */
2846 arm_option_params_internal (void)
2848 /* If we are not using the default (ARM mode) section anchor offset
2849 ranges, then set the correct ranges now. */
2852 /* Thumb-1 LDR instructions cannot have negative offsets.
2853 Permissible positive offset ranges are 5-bit (for byte loads),
2854 6-bit (for halfword loads), or 7-bit (for word loads).
2855 Empirical results suggest a 7-bit anchor range gives the best
2856 overall code size. */
2857 targetm
.min_anchor_offset
= 0;
2858 targetm
.max_anchor_offset
= 127;
2860 else if (TARGET_THUMB2
)
2862 /* The minimum is set such that the total size of the block
2863 for a particular anchor is 248 + 1 + 4095 bytes, which is
2864 divisible by eight, ensuring natural spacing of anchors. */
2865 targetm
.min_anchor_offset
= -248;
2866 targetm
.max_anchor_offset
= 4095;
2870 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2871 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2876 /* If optimizing for size, bump the number of instructions that we
2877 are prepared to conditionally execute (even on a StrongARM). */
2878 max_insns_skipped
= 6;
2880 /* For THUMB2, we limit the conditional sequence to one IT block. */
2882 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2885 /* When -mrestrict-it is in use tone down the if-conversion. */
2886 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2887 ? 1 : current_tune
->max_insns_skipped
;
2890 /* True if -mflip-thumb should next add an attribute for the default
2891 mode, false if it should next add an attribute for the opposite mode. */
2892 static GTY(()) bool thumb_flipper
;
2894 /* Options after initial target override. */
2895 static GTY(()) tree init_optimize
;
2898 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2900 if (opts
->x_align_functions
<= 0)
2901 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2902 && opts
->x_optimize_size
? 2 : 4;
2905 /* Implement targetm.override_options_after_change. */
2908 arm_override_options_after_change (void)
2910 arm_override_options_after_change_1 (&global_options
);
2913 /* Reset options between modes that the user has specified. */
2915 arm_option_override_internal (struct gcc_options
*opts
,
2916 struct gcc_options
*opts_set
)
2918 arm_override_options_after_change_1 (opts
);
2920 if (TARGET_INTERWORK
&& !ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
))
2922 /* The default is to enable interworking, so this warning message would
2923 be confusing to users who have just compiled with, eg, -march=armv3. */
2924 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2925 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2928 if (TARGET_THUMB_P (opts
->x_target_flags
)
2929 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
)))
2931 warning (0, "target CPU does not support THUMB instructions");
2932 opts
->x_target_flags
&= ~MASK_THUMB
;
2935 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2937 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2938 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2941 /* Callee super interworking implies thumb interworking. Adding
2942 this to the flags here simplifies the logic elsewhere. */
2943 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2944 opts
->x_target_flags
|= MASK_INTERWORK
;
2946 /* need to remember initial values so combinaisons of options like
2947 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2948 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2950 if (! opts_set
->x_arm_restrict_it
)
2951 opts
->x_arm_restrict_it
= arm_arch8
;
2953 /* ARM execution state and M profile don't have [restrict] IT. */
2954 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2955 opts
->x_arm_restrict_it
= 0;
2957 /* Enable -munaligned-access by default for
2958 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2959 i.e. Thumb2 and ARM state only.
2960 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2961 - ARMv8 architecture-base processors.
2963 Disable -munaligned-access by default for
2964 - all pre-ARMv6 architecture-based processors
2965 - ARMv6-M architecture-based processors
2966 - ARMv8-M Baseline processors. */
2968 if (! opts_set
->x_unaligned_access
)
2970 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2971 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2973 else if (opts
->x_unaligned_access
== 1
2974 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2976 warning (0, "target CPU does not support unaligned accesses");
2977 opts
->x_unaligned_access
= 0;
2980 /* Don't warn since it's on by default in -O2. */
2981 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2982 opts
->x_flag_schedule_insns
= 0;
2984 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
2986 /* Disable shrink-wrap when optimizing function for size, since it tends to
2987 generate additional returns. */
2988 if (optimize_function_for_size_p (cfun
)
2989 && TARGET_THUMB2_P (opts
->x_target_flags
))
2990 opts
->x_flag_shrink_wrap
= false;
2992 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
2994 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2995 - epilogue_insns - does not accurately model the corresponding insns
2996 emitted in the asm file. In particular, see the comment in thumb_exit
2997 'Find out how many of the (return) argument registers we can corrupt'.
2998 As a consequence, the epilogue may clobber registers without fipa-ra
2999 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3000 TODO: Accurately model clobbers for epilogue_insns and reenable
3002 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3003 opts
->x_flag_ipa_ra
= 0;
3005 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3007 /* Thumb2 inline assembly code should always use unified syntax.
3008 This will apply to ARM and Thumb1 eventually. */
3009 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3011 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3012 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3016 /* Fix up any incompatible options that the user has specified. */
3018 arm_option_override (void)
3020 arm_selected_arch
= NULL
;
3021 arm_selected_cpu
= NULL
;
3022 arm_selected_tune
= NULL
;
3024 if (global_options_set
.x_arm_arch_option
)
3025 arm_selected_arch
= &all_architectures
[arm_arch_option
];
3027 if (global_options_set
.x_arm_cpu_option
)
3029 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
3030 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
3033 if (global_options_set
.x_arm_tune_option
)
3034 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
3036 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3037 SUBTARGET_OVERRIDE_OPTIONS
;
3040 if (arm_selected_arch
)
3042 if (arm_selected_cpu
)
3044 const arm_feature_set tuning_flags
= ARM_FSET_MAKE_CPU1 (FL_TUNE
);
3045 arm_feature_set selected_flags
;
3046 ARM_FSET_XOR (selected_flags
, arm_selected_cpu
->flags
,
3047 arm_selected_arch
->flags
);
3048 ARM_FSET_EXCLUDE (selected_flags
, selected_flags
, tuning_flags
);
3049 /* Check for conflict between mcpu and march. */
3050 if (!ARM_FSET_IS_EMPTY (selected_flags
))
3052 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3053 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3054 /* -march wins for code generation.
3055 -mcpu wins for default tuning. */
3056 if (!arm_selected_tune
)
3057 arm_selected_tune
= arm_selected_cpu
;
3059 arm_selected_cpu
= arm_selected_arch
;
3063 arm_selected_arch
= NULL
;
3066 /* Pick a CPU based on the architecture. */
3067 arm_selected_cpu
= arm_selected_arch
;
3070 /* If the user did not specify a processor, choose one for them. */
3071 if (!arm_selected_cpu
)
3073 const struct processors
* sel
;
3074 arm_feature_set sought
= ARM_FSET_EMPTY
;;
3076 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3077 gcc_assert (arm_selected_cpu
->name
);
3079 sel
= arm_selected_cpu
;
3080 insn_flags
= sel
->flags
;
3082 /* Now check to see if the user has specified some command line
3083 switch that require certain abilities from the cpu. */
3085 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3087 ARM_FSET_ADD_CPU1 (sought
, FL_THUMB
);
3088 ARM_FSET_ADD_CPU1 (sought
, FL_MODE32
);
3090 /* There are no ARM processors that support both APCS-26 and
3091 interworking. Therefore we force FL_MODE26 to be removed
3092 from insn_flags here (if it was set), so that the search
3093 below will always be able to find a compatible processor. */
3094 ARM_FSET_DEL_CPU1 (insn_flags
, FL_MODE26
);
3097 if (!ARM_FSET_IS_EMPTY (sought
)
3098 && !(ARM_FSET_CPU_SUBSET (sought
, insn_flags
)))
3100 /* Try to locate a CPU type that supports all of the abilities
3101 of the default CPU, plus the extra abilities requested by
3103 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3104 if (ARM_FSET_CPU_SUBSET (sought
, sel
->flags
))
3107 if (sel
->name
== NULL
)
3109 unsigned current_bit_count
= 0;
3110 const struct processors
* best_fit
= NULL
;
3112 /* Ideally we would like to issue an error message here
3113 saying that it was not possible to find a CPU compatible
3114 with the default CPU, but which also supports the command
3115 line options specified by the programmer, and so they
3116 ought to use the -mcpu=<name> command line option to
3117 override the default CPU type.
3119 If we cannot find a cpu that has both the
3120 characteristics of the default cpu and the given
3121 command line options we scan the array again looking
3122 for a best match. */
3123 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3125 arm_feature_set required
= ARM_FSET_EMPTY
;
3126 ARM_FSET_UNION (required
, sought
, insn_flags
);
3127 if (ARM_FSET_CPU_SUBSET (required
, sel
->flags
))
3130 arm_feature_set flags
;
3131 ARM_FSET_INTER (flags
, sel
->flags
, insn_flags
);
3132 count
= feature_count (&flags
);
3134 if (count
>= current_bit_count
)
3137 current_bit_count
= count
;
3141 gcc_assert (best_fit
);
3145 arm_selected_cpu
= sel
;
3149 gcc_assert (arm_selected_cpu
);
3150 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3151 if (!arm_selected_tune
)
3152 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3154 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
3155 insn_flags
= arm_selected_cpu
->flags
;
3156 arm_base_arch
= arm_selected_cpu
->base_arch
;
3158 arm_tune
= arm_selected_tune
->core
;
3159 tune_flags
= arm_selected_tune
->flags
;
3160 current_tune
= arm_selected_tune
->tune
;
3162 /* TBD: Dwarf info for apcs frame is not handled yet. */
3163 if (TARGET_APCS_FRAME
)
3164 flag_shrink_wrap
= false;
3166 /* BPABI targets use linker tricks to allow interworking on cores
3167 without thumb support. */
3168 if (TARGET_INTERWORK
3169 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
) || TARGET_BPABI
))
3171 warning (0, "target CPU does not support interworking" );
3172 target_flags
&= ~MASK_INTERWORK
;
3175 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3177 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3178 target_flags
|= MASK_APCS_FRAME
;
3181 if (TARGET_POKE_FUNCTION_NAME
)
3182 target_flags
|= MASK_APCS_FRAME
;
3184 if (TARGET_APCS_REENT
&& flag_pic
)
3185 error ("-fpic and -mapcs-reent are incompatible");
3187 if (TARGET_APCS_REENT
)
3188 warning (0, "APCS reentrant code not supported. Ignored");
3190 if (TARGET_APCS_FLOAT
)
3191 warning (0, "passing floating point arguments in fp regs not yet supported");
3193 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3194 arm_arch3m
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH3M
);
3195 arm_arch4
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH4
);
3196 arm_arch4t
= arm_arch4
&& (ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
));
3197 arm_arch5
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5
);
3198 arm_arch5e
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5E
);
3199 arm_arch6
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6
);
3200 arm_arch6k
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6K
);
3201 arm_arch6kz
= arm_arch6k
&& ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6KZ
);
3202 arm_arch_notm
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
);
3203 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3204 arm_arch7
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7
);
3205 arm_arch7em
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7EM
);
3206 arm_arch8
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH8
);
3207 arm_arch8_1
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_ARCH8_1
);
3208 arm_arch8_2
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_ARCH8_2
);
3209 arm_arch_thumb1
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
);
3210 arm_arch_thumb2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB2
);
3211 arm_arch_xscale
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_XSCALE
);
3213 arm_ld_sched
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_LDSCHED
);
3214 arm_tune_strongarm
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_STRONG
);
3215 arm_tune_wbuf
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_WBUF
);
3216 arm_tune_xscale
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_XSCALE
);
3217 arm_arch_iwmmxt
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT
);
3218 arm_arch_iwmmxt2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT2
);
3219 arm_arch_thumb_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB_DIV
);
3220 arm_arch_arm_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARM_DIV
);
3221 arm_arch_no_volatile_ce
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NO_VOLATILE_CE
);
3222 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3223 arm_arch_crc
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_CRC32
);
3224 arm_m_profile_small_mul
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_SMALLMUL
);
3225 arm_fp16_inst
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_FP16INST
);
3228 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3229 error ("selected fp16 options are incompatible.");
3230 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3233 /* V5 code we generate is completely interworking capable, so we turn off
3234 TARGET_INTERWORK here to avoid many tests later on. */
3236 /* XXX However, we must pass the right pre-processor defines to CPP
3237 or GLD can get confused. This is a hack. */
3238 if (TARGET_INTERWORK
)
3239 arm_cpp_interwork
= 1;
3242 target_flags
&= ~MASK_INTERWORK
;
3244 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3245 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3247 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3248 error ("iwmmxt abi requires an iwmmxt capable cpu");
3250 if (!global_options_set
.x_arm_fpu_index
)
3252 const char *target_fpu_name
;
3255 #ifdef FPUTYPE_DEFAULT
3256 target_fpu_name
= FPUTYPE_DEFAULT
;
3258 target_fpu_name
= "vfp";
3261 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
3266 /* If soft-float is specified then don't use FPU. */
3267 if (TARGET_SOFT_FLOAT
)
3268 arm_fpu_attr
= FPU_NONE
;
3270 arm_fpu_attr
= FPU_VFP
;
3272 if (TARGET_AAPCS_BASED
)
3274 if (TARGET_CALLER_INTERWORKING
)
3275 error ("AAPCS does not support -mcaller-super-interworking");
3277 if (TARGET_CALLEE_INTERWORKING
)
3278 error ("AAPCS does not support -mcallee-super-interworking");
3281 /* __fp16 support currently assumes the core has ldrh. */
3282 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3283 sorry ("__fp16 and no ldrh");
3285 if (TARGET_AAPCS_BASED
)
3287 if (arm_abi
== ARM_ABI_IWMMXT
)
3288 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3289 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3290 && TARGET_HARD_FLOAT
)
3291 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3293 arm_pcs_default
= ARM_PCS_AAPCS
;
3297 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3298 sorry ("-mfloat-abi=hard and VFP");
3300 if (arm_abi
== ARM_ABI_APCS
)
3301 arm_pcs_default
= ARM_PCS_APCS
;
3303 arm_pcs_default
= ARM_PCS_ATPCS
;
3306 /* For arm2/3 there is no need to do any scheduling if we are doing
3307 software floating-point. */
3308 if (TARGET_SOFT_FLOAT
&& !ARM_FSET_HAS_CPU1 (tune_flags
, FL_MODE32
))
3309 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3311 /* Use the cp15 method if it is available. */
3312 if (target_thread_pointer
== TP_AUTO
)
3314 if (arm_arch6k
&& !TARGET_THUMB1
)
3315 target_thread_pointer
= TP_CP15
;
3317 target_thread_pointer
= TP_SOFT
;
3320 /* Override the default structure alignment for AAPCS ABI. */
3321 if (!global_options_set
.x_arm_structure_size_boundary
)
3323 if (TARGET_AAPCS_BASED
)
3324 arm_structure_size_boundary
= 8;
3328 if (arm_structure_size_boundary
!= 8
3329 && arm_structure_size_boundary
!= 32
3330 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3332 if (ARM_DOUBLEWORD_ALIGN
)
3334 "structure size boundary can only be set to 8, 32 or 64");
3336 warning (0, "structure size boundary can only be set to 8 or 32");
3337 arm_structure_size_boundary
3338 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3342 if (TARGET_VXWORKS_RTP
)
3344 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3345 arm_pic_data_is_text_relative
= 0;
3348 && !arm_pic_data_is_text_relative
3349 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3350 /* When text & data segments don't have a fixed displacement, the
3351 intended use is with a single, read only, pic base register.
3352 Unless the user explicitly requested not to do that, set
3354 target_flags
|= MASK_SINGLE_PIC_BASE
;
3356 /* If stack checking is disabled, we can use r10 as the PIC register,
3357 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3358 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3360 if (TARGET_VXWORKS_RTP
)
3361 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3362 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3365 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3366 arm_pic_register
= 9;
3368 if (arm_pic_register_string
!= NULL
)
3370 int pic_register
= decode_reg_name (arm_pic_register_string
);
3373 warning (0, "-mpic-register= is useless without -fpic");
3375 /* Prevent the user from choosing an obviously stupid PIC register. */
3376 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3377 || pic_register
== HARD_FRAME_POINTER_REGNUM
3378 || pic_register
== STACK_POINTER_REGNUM
3379 || pic_register
>= PC_REGNUM
3380 || (TARGET_VXWORKS_RTP
3381 && (unsigned int) pic_register
!= arm_pic_register
))
3382 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3384 arm_pic_register
= pic_register
;
3387 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3388 if (fix_cm3_ldrd
== 2)
3390 if (arm_selected_cpu
->core
== TARGET_CPU_cortexm3
)
3396 /* Hot/Cold partitioning is not currently supported, since we can't
3397 handle literal pool placement in that case. */
3398 if (flag_reorder_blocks_and_partition
)
3400 inform (input_location
,
3401 "-freorder-blocks-and-partition not supported on this architecture");
3402 flag_reorder_blocks_and_partition
= 0;
3403 flag_reorder_blocks
= 1;
3407 /* Hoisting PIC address calculations more aggressively provides a small,
3408 but measurable, size reduction for PIC code. Therefore, we decrease
3409 the bar for unrestricted expression hoisting to the cost of PIC address
3410 calculation, which is 2 instructions. */
3411 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3412 global_options
.x_param_values
,
3413 global_options_set
.x_param_values
);
3415 /* ARM EABI defaults to strict volatile bitfields. */
3416 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3417 && abi_version_at_least(2))
3418 flag_strict_volatile_bitfields
= 1;
3420 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3421 have deemed it beneficial (signified by setting
3422 prefetch.num_slots to 1 or more). */
3423 if (flag_prefetch_loop_arrays
< 0
3426 && current_tune
->prefetch
.num_slots
> 0)
3427 flag_prefetch_loop_arrays
= 1;
3429 /* Set up parameters to be used in prefetching algorithm. Do not
3430 override the defaults unless we are tuning for a core we have
3431 researched values for. */
3432 if (current_tune
->prefetch
.num_slots
> 0)
3433 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3434 current_tune
->prefetch
.num_slots
,
3435 global_options
.x_param_values
,
3436 global_options_set
.x_param_values
);
3437 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3438 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3439 current_tune
->prefetch
.l1_cache_line_size
,
3440 global_options
.x_param_values
,
3441 global_options_set
.x_param_values
);
3442 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3443 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3444 current_tune
->prefetch
.l1_cache_size
,
3445 global_options
.x_param_values
,
3446 global_options_set
.x_param_values
);
3448 /* Use Neon to perform 64-bits operations rather than core
3450 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3451 if (use_neon_for_64bits
== 1)
3452 prefer_neon_for_64bits
= true;
3454 /* Use the alternative scheduling-pressure algorithm by default. */
3455 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3456 global_options
.x_param_values
,
3457 global_options_set
.x_param_values
);
3459 /* Look through ready list and all of queue for instructions
3460 relevant for L2 auto-prefetcher. */
3461 int param_sched_autopref_queue_depth
;
3463 switch (current_tune
->sched_autopref
)
3465 case tune_params::SCHED_AUTOPREF_OFF
:
3466 param_sched_autopref_queue_depth
= -1;
3469 case tune_params::SCHED_AUTOPREF_RANK
:
3470 param_sched_autopref_queue_depth
= 0;
3473 case tune_params::SCHED_AUTOPREF_FULL
:
3474 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3481 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3482 param_sched_autopref_queue_depth
,
3483 global_options
.x_param_values
,
3484 global_options_set
.x_param_values
);
3486 /* Currently, for slow flash data, we just disable literal pools. We also
3487 disable it for pure-code. */
3488 if (target_slow_flash_data
|| target_pure_code
)
3489 arm_disable_literal_pool
= true;
3491 /* Disable scheduling fusion by default if it's not armv7 processor
3492 or doesn't prefer ldrd/strd. */
3493 if (flag_schedule_fusion
== 2
3494 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3495 flag_schedule_fusion
= 0;
3497 /* Need to remember initial options before they are overriden. */
3498 init_optimize
= build_optimization_node (&global_options
);
3500 arm_option_override_internal (&global_options
, &global_options_set
);
3501 arm_option_check_internal (&global_options
);
3502 arm_option_params_internal ();
3504 /* Register global variables with the garbage collector. */
3505 arm_add_gc_roots ();
3507 /* Save the initial options in case the user does function specific
3508 options or #pragma target. */
3509 target_option_default_node
= target_option_current_node
3510 = build_target_option_node (&global_options
);
3512 /* Init initial mode for testing. */
3513 thumb_flipper
= TARGET_THUMB
;
3517 arm_add_gc_roots (void)
3519 gcc_obstack_init(&minipool_obstack
);
3520 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3523 /* A table of known ARM exception types.
3524 For use with the interrupt function attribute. */
3528 const char *const arg
;
3529 const unsigned long return_value
;
3533 static const isr_attribute_arg isr_attribute_args
[] =
3535 { "IRQ", ARM_FT_ISR
},
3536 { "irq", ARM_FT_ISR
},
3537 { "FIQ", ARM_FT_FIQ
},
3538 { "fiq", ARM_FT_FIQ
},
3539 { "ABORT", ARM_FT_ISR
},
3540 { "abort", ARM_FT_ISR
},
3541 { "ABORT", ARM_FT_ISR
},
3542 { "abort", ARM_FT_ISR
},
3543 { "UNDEF", ARM_FT_EXCEPTION
},
3544 { "undef", ARM_FT_EXCEPTION
},
3545 { "SWI", ARM_FT_EXCEPTION
},
3546 { "swi", ARM_FT_EXCEPTION
},
3547 { NULL
, ARM_FT_NORMAL
}
3550 /* Returns the (interrupt) function type of the current
3551 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3553 static unsigned long
3554 arm_isr_value (tree argument
)
3556 const isr_attribute_arg
* ptr
;
3560 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3562 /* No argument - default to IRQ. */
3563 if (argument
== NULL_TREE
)
3566 /* Get the value of the argument. */
3567 if (TREE_VALUE (argument
) == NULL_TREE
3568 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3569 return ARM_FT_UNKNOWN
;
3571 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3573 /* Check it against the list of known arguments. */
3574 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3575 if (streq (arg
, ptr
->arg
))
3576 return ptr
->return_value
;
3578 /* An unrecognized interrupt type. */
3579 return ARM_FT_UNKNOWN
;
3582 /* Computes the type of the current function. */
3584 static unsigned long
3585 arm_compute_func_type (void)
3587 unsigned long type
= ARM_FT_UNKNOWN
;
3591 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3593 /* Decide if the current function is volatile. Such functions
3594 never return, and many memory cycles can be saved by not storing
3595 register values that will never be needed again. This optimization
3596 was added to speed up context switching in a kernel application. */
3598 && (TREE_NOTHROW (current_function_decl
)
3599 || !(flag_unwind_tables
3601 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3602 && TREE_THIS_VOLATILE (current_function_decl
))
3603 type
|= ARM_FT_VOLATILE
;
3605 if (cfun
->static_chain_decl
!= NULL
)
3606 type
|= ARM_FT_NESTED
;
3608 attr
= DECL_ATTRIBUTES (current_function_decl
);
3610 a
= lookup_attribute ("naked", attr
);
3612 type
|= ARM_FT_NAKED
;
3614 a
= lookup_attribute ("isr", attr
);
3616 a
= lookup_attribute ("interrupt", attr
);
3619 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3621 type
|= arm_isr_value (TREE_VALUE (a
));
3626 /* Returns the type of the current function. */
3629 arm_current_func_type (void)
3631 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3632 cfun
->machine
->func_type
= arm_compute_func_type ();
3634 return cfun
->machine
->func_type
;
3638 arm_allocate_stack_slots_for_args (void)
3640 /* Naked functions should not allocate stack slots for arguments. */
3641 return !IS_NAKED (arm_current_func_type ());
3645 arm_warn_func_return (tree decl
)
3647 /* Naked functions are implemented entirely in assembly, including the
3648 return sequence, so suppress warnings about this. */
3649 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3653 /* Output assembler code for a block containing the constant parts
3654 of a trampoline, leaving space for the variable parts.
3656 On the ARM, (if r8 is the static chain regnum, and remembering that
3657 referencing pc adds an offset of 8) the trampoline looks like:
3660 .word static chain value
3661 .word function's address
3662 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3665 arm_asm_trampoline_template (FILE *f
)
3667 fprintf (f
, "\t.syntax unified\n");
3671 fprintf (f
, "\t.arm\n");
3672 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3673 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3675 else if (TARGET_THUMB2
)
3677 fprintf (f
, "\t.thumb\n");
3678 /* The Thumb-2 trampoline is similar to the arm implementation.
3679 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3680 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3681 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3682 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3686 ASM_OUTPUT_ALIGN (f
, 2);
3687 fprintf (f
, "\t.code\t16\n");
3688 fprintf (f
, ".Ltrampoline_start:\n");
3689 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3690 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3691 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3692 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3693 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3694 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3696 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3697 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3700 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3703 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3705 rtx fnaddr
, mem
, a_tramp
;
3707 emit_block_move (m_tramp
, assemble_trampoline_template (),
3708 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3710 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3711 emit_move_insn (mem
, chain_value
);
3713 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3714 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3715 emit_move_insn (mem
, fnaddr
);
3717 a_tramp
= XEXP (m_tramp
, 0);
3718 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3719 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3720 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3723 /* Thumb trampolines should be entered in thumb mode, so set
3724 the bottom bit of the address. */
3727 arm_trampoline_adjust_address (rtx addr
)
3730 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3731 NULL
, 0, OPTAB_LIB_WIDEN
);
3735 /* Return 1 if it is possible to return using a single instruction.
3736 If SIBLING is non-null, this is a test for a return before a sibling
3737 call. SIBLING is the call insn, so we can examine its register usage. */
3740 use_return_insn (int iscond
, rtx sibling
)
3743 unsigned int func_type
;
3744 unsigned long saved_int_regs
;
3745 unsigned HOST_WIDE_INT stack_adjust
;
3746 arm_stack_offsets
*offsets
;
3748 /* Never use a return instruction before reload has run. */
3749 if (!reload_completed
)
3752 func_type
= arm_current_func_type ();
3754 /* Naked, volatile and stack alignment functions need special
3756 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3759 /* So do interrupt functions that use the frame pointer and Thumb
3760 interrupt functions. */
3761 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3764 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3765 && !optimize_function_for_size_p (cfun
))
3768 offsets
= arm_get_frame_offsets ();
3769 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3771 /* As do variadic functions. */
3772 if (crtl
->args
.pretend_args_size
3773 || cfun
->machine
->uses_anonymous_args
3774 /* Or if the function calls __builtin_eh_return () */
3775 || crtl
->calls_eh_return
3776 /* Or if the function calls alloca */
3777 || cfun
->calls_alloca
3778 /* Or if there is a stack adjustment. However, if the stack pointer
3779 is saved on the stack, we can use a pre-incrementing stack load. */
3780 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3781 && stack_adjust
== 4))
3782 /* Or if the static chain register was saved above the frame, under the
3783 assumption that the stack pointer isn't saved on the stack. */
3784 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3785 && arm_compute_static_chain_stack_bytes() != 0))
3788 saved_int_regs
= offsets
->saved_regs_mask
;
3790 /* Unfortunately, the insn
3792 ldmib sp, {..., sp, ...}
3794 triggers a bug on most SA-110 based devices, such that the stack
3795 pointer won't be correctly restored if the instruction takes a
3796 page fault. We work around this problem by popping r3 along with
3797 the other registers, since that is never slower than executing
3798 another instruction.
3800 We test for !arm_arch5 here, because code for any architecture
3801 less than this could potentially be run on one of the buggy
3803 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3805 /* Validate that r3 is a call-clobbered register (always true in
3806 the default abi) ... */
3807 if (!call_used_regs
[3])
3810 /* ... that it isn't being used for a return value ... */
3811 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3814 /* ... or for a tail-call argument ... */
3817 gcc_assert (CALL_P (sibling
));
3819 if (find_regno_fusage (sibling
, USE
, 3))
3823 /* ... and that there are no call-saved registers in r0-r2
3824 (always true in the default ABI). */
3825 if (saved_int_regs
& 0x7)
3829 /* Can't be done if interworking with Thumb, and any registers have been
3831 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3834 /* On StrongARM, conditional returns are expensive if they aren't
3835 taken and multiple registers have been stacked. */
3836 if (iscond
&& arm_tune_strongarm
)
3838 /* Conditional return when just the LR is stored is a simple
3839 conditional-load instruction, that's not expensive. */
3840 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3844 && arm_pic_register
!= INVALID_REGNUM
3845 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3849 /* If there are saved registers but the LR isn't saved, then we need
3850 two instructions for the return. */
3851 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3854 /* Can't be done if any of the VFP regs are pushed,
3855 since this also requires an insn. */
3856 if (TARGET_HARD_FLOAT
)
3857 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3858 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3861 if (TARGET_REALLY_IWMMXT
)
3862 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3863 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3869 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3870 shrink-wrapping if possible. This is the case if we need to emit a
3871 prologue, which we can test by looking at the offsets. */
3873 use_simple_return_p (void)
3875 arm_stack_offsets
*offsets
;
3877 offsets
= arm_get_frame_offsets ();
3878 return offsets
->outgoing_args
!= 0;
3881 /* Return TRUE if int I is a valid immediate ARM constant. */
3884 const_ok_for_arm (HOST_WIDE_INT i
)
3888 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3889 be all zero, or all one. */
3890 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3891 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3892 != ((~(unsigned HOST_WIDE_INT
) 0)
3893 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3896 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3898 /* Fast return for 0 and small values. We must do this for zero, since
3899 the code below can't handle that one case. */
3900 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3903 /* Get the number of trailing zeros. */
3904 lowbit
= ffs((int) i
) - 1;
3906 /* Only even shifts are allowed in ARM mode so round down to the
3907 nearest even number. */
3911 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3916 /* Allow rotated constants in ARM mode. */
3918 && ((i
& ~0xc000003f) == 0
3919 || (i
& ~0xf000000f) == 0
3920 || (i
& ~0xfc000003) == 0))
3927 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3930 if (i
== v
|| i
== (v
| (v
<< 8)))
3933 /* Allow repeated pattern 0xXY00XY00. */
3943 /* Return true if I is a valid constant for the operation CODE. */
3945 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3947 if (const_ok_for_arm (i
))
3953 /* See if we can use movw. */
3954 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
3957 /* Otherwise, try mvn. */
3958 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3961 /* See if we can use addw or subw. */
3963 && ((i
& 0xfffff000) == 0
3964 || ((-i
) & 0xfffff000) == 0))
3985 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3987 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3993 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3997 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4004 /* Return true if I is a valid di mode constant for the operation CODE. */
4006 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4008 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4009 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4010 rtx hi
= GEN_INT (hi_val
);
4011 rtx lo
= GEN_INT (lo_val
);
4021 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4022 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4024 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4031 /* Emit a sequence of insns to handle a large constant.
4032 CODE is the code of the operation required, it can be any of SET, PLUS,
4033 IOR, AND, XOR, MINUS;
4034 MODE is the mode in which the operation is being performed;
4035 VAL is the integer to operate on;
4036 SOURCE is the other operand (a register, or a null-pointer for SET);
4037 SUBTARGETS means it is safe to create scratch registers if that will
4038 either produce a simpler sequence, or we will want to cse the values.
4039 Return value is the number of insns emitted. */
4041 /* ??? Tweak this for thumb2. */
4043 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4044 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4048 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4049 cond
= COND_EXEC_TEST (PATTERN (insn
));
4053 if (subtargets
|| code
== SET
4054 || (REG_P (target
) && REG_P (source
)
4055 && REGNO (target
) != REGNO (source
)))
4057 /* After arm_reorg has been called, we can't fix up expensive
4058 constants by pushing them into memory so we must synthesize
4059 them in-line, regardless of the cost. This is only likely to
4060 be more costly on chips that have load delay slots and we are
4061 compiling without running the scheduler (so no splitting
4062 occurred before the final instruction emission).
4064 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4066 if (!cfun
->machine
->after_arm_reorg
4068 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4070 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4075 /* Currently SET is the only monadic value for CODE, all
4076 the rest are diadic. */
4077 if (TARGET_USE_MOVT
)
4078 arm_emit_movpair (target
, GEN_INT (val
));
4080 emit_set_insn (target
, GEN_INT (val
));
4086 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4088 if (TARGET_USE_MOVT
)
4089 arm_emit_movpair (temp
, GEN_INT (val
));
4091 emit_set_insn (temp
, GEN_INT (val
));
4093 /* For MINUS, the value is subtracted from, since we never
4094 have subtraction of a constant. */
4096 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4098 emit_set_insn (target
,
4099 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4105 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4109 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4110 ARM/THUMB2 immediates, and add up to VAL.
4111 Thr function return value gives the number of insns required. */
4113 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4114 struct four_ints
*return_sequence
)
4116 int best_consecutive_zeros
= 0;
4120 struct four_ints tmp_sequence
;
4122 /* If we aren't targeting ARM, the best place to start is always at
4123 the bottom, otherwise look more closely. */
4126 for (i
= 0; i
< 32; i
+= 2)
4128 int consecutive_zeros
= 0;
4130 if (!(val
& (3 << i
)))
4132 while ((i
< 32) && !(val
& (3 << i
)))
4134 consecutive_zeros
+= 2;
4137 if (consecutive_zeros
> best_consecutive_zeros
)
4139 best_consecutive_zeros
= consecutive_zeros
;
4140 best_start
= i
- consecutive_zeros
;
4147 /* So long as it won't require any more insns to do so, it's
4148 desirable to emit a small constant (in bits 0...9) in the last
4149 insn. This way there is more chance that it can be combined with
4150 a later addressing insn to form a pre-indexed load or store
4151 operation. Consider:
4153 *((volatile int *)0xe0000100) = 1;
4154 *((volatile int *)0xe0000110) = 2;
4156 We want this to wind up as:
4160 str rB, [rA, #0x100]
4162 str rB, [rA, #0x110]
4164 rather than having to synthesize both large constants from scratch.
4166 Therefore, we calculate how many insns would be required to emit
4167 the constant starting from `best_start', and also starting from
4168 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4169 yield a shorter sequence, we may as well use zero. */
4170 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4172 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4174 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4175 if (insns2
<= insns1
)
4177 *return_sequence
= tmp_sequence
;
4185 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4187 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4188 struct four_ints
*return_sequence
, int i
)
4190 int remainder
= val
& 0xffffffff;
4193 /* Try and find a way of doing the job in either two or three
4196 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4197 location. We start at position I. This may be the MSB, or
4198 optimial_immediate_sequence may have positioned it at the largest block
4199 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4200 wrapping around to the top of the word when we drop off the bottom.
4201 In the worst case this code should produce no more than four insns.
4203 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4204 constants, shifted to any arbitrary location. We should always start
4209 unsigned int b1
, b2
, b3
, b4
;
4210 unsigned HOST_WIDE_INT result
;
4213 gcc_assert (insns
< 4);
4218 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4219 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4222 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4223 /* We can use addw/subw for the last 12 bits. */
4227 /* Use an 8-bit shifted/rotated immediate. */
4231 result
= remainder
& ((0x0ff << end
)
4232 | ((i
< end
) ? (0xff >> (32 - end
))
4239 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4240 arbitrary shifts. */
4241 i
-= TARGET_ARM
? 2 : 1;
4245 /* Next, see if we can do a better job with a thumb2 replicated
4248 We do it this way around to catch the cases like 0x01F001E0 where
4249 two 8-bit immediates would work, but a replicated constant would
4252 TODO: 16-bit constants that don't clear all the bits, but still win.
4253 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4256 b1
= (remainder
& 0xff000000) >> 24;
4257 b2
= (remainder
& 0x00ff0000) >> 16;
4258 b3
= (remainder
& 0x0000ff00) >> 8;
4259 b4
= remainder
& 0xff;
4263 /* The 8-bit immediate already found clears b1 (and maybe b2),
4264 but must leave b3 and b4 alone. */
4266 /* First try to find a 32-bit replicated constant that clears
4267 almost everything. We can assume that we can't do it in one,
4268 or else we wouldn't be here. */
4269 unsigned int tmp
= b1
& b2
& b3
& b4
;
4270 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4272 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4273 + (tmp
== b3
) + (tmp
== b4
);
4275 && (matching_bytes
>= 3
4276 || (matching_bytes
== 2
4277 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4279 /* At least 3 of the bytes match, and the fourth has at
4280 least as many bits set, or two of the bytes match
4281 and it will only require one more insn to finish. */
4289 /* Second, try to find a 16-bit replicated constant that can
4290 leave three of the bytes clear. If b2 or b4 is already
4291 zero, then we can. If the 8-bit from above would not
4292 clear b2 anyway, then we still win. */
4293 else if (b1
== b3
&& (!b2
|| !b4
4294 || (remainder
& 0x00ff0000 & ~result
)))
4296 result
= remainder
& 0xff00ff00;
4302 /* The 8-bit immediate already found clears b2 (and maybe b3)
4303 and we don't get here unless b1 is alredy clear, but it will
4304 leave b4 unchanged. */
4306 /* If we can clear b2 and b4 at once, then we win, since the
4307 8-bits couldn't possibly reach that far. */
4310 result
= remainder
& 0x00ff00ff;
4316 return_sequence
->i
[insns
++] = result
;
4317 remainder
&= ~result
;
4319 if (code
== SET
|| code
== MINUS
)
4327 /* Emit an instruction with the indicated PATTERN. If COND is
4328 non-NULL, conditionalize the execution of the instruction on COND
4332 emit_constant_insn (rtx cond
, rtx pattern
)
4335 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4336 emit_insn (pattern
);
4339 /* As above, but extra parameter GENERATE which, if clear, suppresses
4343 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4344 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4345 int subtargets
, int generate
)
4349 int final_invert
= 0;
4351 int set_sign_bit_copies
= 0;
4352 int clear_sign_bit_copies
= 0;
4353 int clear_zero_bit_copies
= 0;
4354 int set_zero_bit_copies
= 0;
4355 int insns
= 0, neg_insns
, inv_insns
;
4356 unsigned HOST_WIDE_INT temp1
, temp2
;
4357 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4358 struct four_ints
*immediates
;
4359 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4361 /* Find out which operations are safe for a given CODE. Also do a quick
4362 check for degenerate cases; these can occur when DImode operations
4375 if (remainder
== 0xffffffff)
4378 emit_constant_insn (cond
,
4379 gen_rtx_SET (target
,
4380 GEN_INT (ARM_SIGN_EXTEND (val
))));
4386 if (reload_completed
&& rtx_equal_p (target
, source
))
4390 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4399 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4402 if (remainder
== 0xffffffff)
4404 if (reload_completed
&& rtx_equal_p (target
, source
))
4407 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4416 if (reload_completed
&& rtx_equal_p (target
, source
))
4419 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4423 if (remainder
== 0xffffffff)
4426 emit_constant_insn (cond
,
4427 gen_rtx_SET (target
,
4428 gen_rtx_NOT (mode
, source
)));
4435 /* We treat MINUS as (val - source), since (source - val) is always
4436 passed as (source + (-val)). */
4440 emit_constant_insn (cond
,
4441 gen_rtx_SET (target
,
4442 gen_rtx_NEG (mode
, source
)));
4445 if (const_ok_for_arm (val
))
4448 emit_constant_insn (cond
,
4449 gen_rtx_SET (target
,
4450 gen_rtx_MINUS (mode
, GEN_INT (val
),
4461 /* If we can do it in one insn get out quickly. */
4462 if (const_ok_for_op (val
, code
))
4465 emit_constant_insn (cond
,
4466 gen_rtx_SET (target
,
4468 ? gen_rtx_fmt_ee (code
, mode
, source
,
4474 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4476 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4477 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4481 if (mode
== SImode
&& i
== 16)
4482 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4484 emit_constant_insn (cond
,
4485 gen_zero_extendhisi2
4486 (target
, gen_lowpart (HImode
, source
)));
4488 /* Extz only supports SImode, but we can coerce the operands
4490 emit_constant_insn (cond
,
4491 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4492 gen_lowpart (SImode
, source
),
4493 GEN_INT (i
), const0_rtx
));
4499 /* Calculate a few attributes that may be useful for specific
4501 /* Count number of leading zeros. */
4502 for (i
= 31; i
>= 0; i
--)
4504 if ((remainder
& (1 << i
)) == 0)
4505 clear_sign_bit_copies
++;
4510 /* Count number of leading 1's. */
4511 for (i
= 31; i
>= 0; i
--)
4513 if ((remainder
& (1 << i
)) != 0)
4514 set_sign_bit_copies
++;
4519 /* Count number of trailing zero's. */
4520 for (i
= 0; i
<= 31; i
++)
4522 if ((remainder
& (1 << i
)) == 0)
4523 clear_zero_bit_copies
++;
4528 /* Count number of trailing 1's. */
4529 for (i
= 0; i
<= 31; i
++)
4531 if ((remainder
& (1 << i
)) != 0)
4532 set_zero_bit_copies
++;
4540 /* See if we can do this by sign_extending a constant that is known
4541 to be negative. This is a good, way of doing it, since the shift
4542 may well merge into a subsequent insn. */
4543 if (set_sign_bit_copies
> 1)
4545 if (const_ok_for_arm
4546 (temp1
= ARM_SIGN_EXTEND (remainder
4547 << (set_sign_bit_copies
- 1))))
4551 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4552 emit_constant_insn (cond
,
4553 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4554 emit_constant_insn (cond
,
4555 gen_ashrsi3 (target
, new_src
,
4556 GEN_INT (set_sign_bit_copies
- 1)));
4560 /* For an inverted constant, we will need to set the low bits,
4561 these will be shifted out of harm's way. */
4562 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4563 if (const_ok_for_arm (~temp1
))
4567 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4568 emit_constant_insn (cond
,
4569 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4570 emit_constant_insn (cond
,
4571 gen_ashrsi3 (target
, new_src
,
4572 GEN_INT (set_sign_bit_copies
- 1)));
4578 /* See if we can calculate the value as the difference between two
4579 valid immediates. */
4580 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4582 int topshift
= clear_sign_bit_copies
& ~1;
4584 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4585 & (0xff000000 >> topshift
));
4587 /* If temp1 is zero, then that means the 9 most significant
4588 bits of remainder were 1 and we've caused it to overflow.
4589 When topshift is 0 we don't need to do anything since we
4590 can borrow from 'bit 32'. */
4591 if (temp1
== 0 && topshift
!= 0)
4592 temp1
= 0x80000000 >> (topshift
- 1);
4594 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4596 if (const_ok_for_arm (temp2
))
4600 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4601 emit_constant_insn (cond
,
4602 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4603 emit_constant_insn (cond
,
4604 gen_addsi3 (target
, new_src
,
4612 /* See if we can generate this by setting the bottom (or the top)
4613 16 bits, and then shifting these into the other half of the
4614 word. We only look for the simplest cases, to do more would cost
4615 too much. Be careful, however, not to generate this when the
4616 alternative would take fewer insns. */
4617 if (val
& 0xffff0000)
4619 temp1
= remainder
& 0xffff0000;
4620 temp2
= remainder
& 0x0000ffff;
4622 /* Overlaps outside this range are best done using other methods. */
4623 for (i
= 9; i
< 24; i
++)
4625 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4626 && !const_ok_for_arm (temp2
))
4628 rtx new_src
= (subtargets
4629 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4631 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4632 source
, subtargets
, generate
);
4640 gen_rtx_ASHIFT (mode
, source
,
4647 /* Don't duplicate cases already considered. */
4648 for (i
= 17; i
< 24; i
++)
4650 if (((temp1
| (temp1
>> i
)) == remainder
)
4651 && !const_ok_for_arm (temp1
))
4653 rtx new_src
= (subtargets
4654 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4656 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4657 source
, subtargets
, generate
);
4662 gen_rtx_SET (target
,
4665 gen_rtx_LSHIFTRT (mode
, source
,
4676 /* If we have IOR or XOR, and the constant can be loaded in a
4677 single instruction, and we can find a temporary to put it in,
4678 then this can be done in two instructions instead of 3-4. */
4680 /* TARGET can't be NULL if SUBTARGETS is 0 */
4681 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4683 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4687 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4689 emit_constant_insn (cond
,
4690 gen_rtx_SET (sub
, GEN_INT (val
)));
4691 emit_constant_insn (cond
,
4692 gen_rtx_SET (target
,
4693 gen_rtx_fmt_ee (code
, mode
,
4704 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4705 and the remainder 0s for e.g. 0xfff00000)
4706 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4708 This can be done in 2 instructions by using shifts with mov or mvn.
4713 mvn r0, r0, lsr #12 */
4714 if (set_sign_bit_copies
> 8
4715 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4719 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4720 rtx shift
= GEN_INT (set_sign_bit_copies
);
4726 gen_rtx_ASHIFT (mode
,
4731 gen_rtx_SET (target
,
4733 gen_rtx_LSHIFTRT (mode
, sub
,
4740 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4742 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4744 For eg. r0 = r0 | 0xfff
4749 if (set_zero_bit_copies
> 8
4750 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4754 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4755 rtx shift
= GEN_INT (set_zero_bit_copies
);
4761 gen_rtx_LSHIFTRT (mode
,
4766 gen_rtx_SET (target
,
4768 gen_rtx_ASHIFT (mode
, sub
,
4774 /* This will never be reached for Thumb2 because orn is a valid
4775 instruction. This is for Thumb1 and the ARM 32 bit cases.
4777 x = y | constant (such that ~constant is a valid constant)
4779 x = ~(~y & ~constant).
4781 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4785 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4786 emit_constant_insn (cond
,
4788 gen_rtx_NOT (mode
, source
)));
4791 sub
= gen_reg_rtx (mode
);
4792 emit_constant_insn (cond
,
4794 gen_rtx_AND (mode
, source
,
4796 emit_constant_insn (cond
,
4797 gen_rtx_SET (target
,
4798 gen_rtx_NOT (mode
, sub
)));
4805 /* See if two shifts will do 2 or more insn's worth of work. */
4806 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4808 HOST_WIDE_INT shift_mask
= ((0xffffffff
4809 << (32 - clear_sign_bit_copies
))
4812 if ((remainder
| shift_mask
) != 0xffffffff)
4814 HOST_WIDE_INT new_val
4815 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4819 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4820 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4821 new_src
, source
, subtargets
, 1);
4826 rtx targ
= subtargets
? NULL_RTX
: target
;
4827 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4828 targ
, source
, subtargets
, 0);
4834 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4835 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4837 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4838 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4844 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4846 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4848 if ((remainder
| shift_mask
) != 0xffffffff)
4850 HOST_WIDE_INT new_val
4851 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4854 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4856 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4857 new_src
, source
, subtargets
, 1);
4862 rtx targ
= subtargets
? NULL_RTX
: target
;
4864 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4865 targ
, source
, subtargets
, 0);
4871 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4872 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4874 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4875 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4887 /* Calculate what the instruction sequences would be if we generated it
4888 normally, negated, or inverted. */
4890 /* AND cannot be split into multiple insns, so invert and use BIC. */
4893 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4896 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4901 if (can_invert
|| final_invert
)
4902 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4907 immediates
= &pos_immediates
;
4909 /* Is the negated immediate sequence more efficient? */
4910 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4913 immediates
= &neg_immediates
;
4918 /* Is the inverted immediate sequence more efficient?
4919 We must allow for an extra NOT instruction for XOR operations, although
4920 there is some chance that the final 'mvn' will get optimized later. */
4921 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4924 immediates
= &inv_immediates
;
4932 /* Now output the chosen sequence as instructions. */
4935 for (i
= 0; i
< insns
; i
++)
4937 rtx new_src
, temp1_rtx
;
4939 temp1
= immediates
->i
[i
];
4941 if (code
== SET
|| code
== MINUS
)
4942 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4943 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4944 new_src
= gen_reg_rtx (mode
);
4950 else if (can_negate
)
4953 temp1
= trunc_int_for_mode (temp1
, mode
);
4954 temp1_rtx
= GEN_INT (temp1
);
4958 else if (code
== MINUS
)
4959 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4961 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4963 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
4968 can_negate
= can_invert
;
4972 else if (code
== MINUS
)
4980 emit_constant_insn (cond
, gen_rtx_SET (target
,
4981 gen_rtx_NOT (mode
, source
)));
4988 /* Canonicalize a comparison so that we are more likely to recognize it.
4989 This can be done for a few constant compares, where we can make the
4990 immediate value easier to load. */
4993 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4994 bool op0_preserve_value
)
4997 unsigned HOST_WIDE_INT i
, maxval
;
4999 mode
= GET_MODE (*op0
);
5000 if (mode
== VOIDmode
)
5001 mode
= GET_MODE (*op1
);
5003 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5005 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5006 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5007 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5008 for GTU/LEU in Thumb mode. */
5012 if (*code
== GT
|| *code
== LE
5013 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5015 /* Missing comparison. First try to use an available
5017 if (CONST_INT_P (*op1
))
5025 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5027 *op1
= GEN_INT (i
+ 1);
5028 *code
= *code
== GT
? GE
: LT
;
5034 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5035 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5037 *op1
= GEN_INT (i
+ 1);
5038 *code
= *code
== GTU
? GEU
: LTU
;
5047 /* If that did not work, reverse the condition. */
5048 if (!op0_preserve_value
)
5050 std::swap (*op0
, *op1
);
5051 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5057 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5058 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5059 to facilitate possible combining with a cmp into 'ands'. */
5061 && GET_CODE (*op0
) == ZERO_EXTEND
5062 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5063 && GET_MODE (XEXP (*op0
, 0)) == QImode
5064 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5065 && subreg_lowpart_p (XEXP (*op0
, 0))
5066 && *op1
== const0_rtx
)
5067 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5070 /* Comparisons smaller than DImode. Only adjust comparisons against
5071 an out-of-range constant. */
5072 if (!CONST_INT_P (*op1
)
5073 || const_ok_for_arm (INTVAL (*op1
))
5074 || const_ok_for_arm (- INTVAL (*op1
)))
5088 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5090 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5091 *code
= *code
== GT
? GE
: LT
;
5099 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5101 *op1
= GEN_INT (i
- 1);
5102 *code
= *code
== GE
? GT
: LE
;
5109 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5110 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5112 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5113 *code
= *code
== GTU
? GEU
: LTU
;
5121 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5123 *op1
= GEN_INT (i
- 1);
5124 *code
= *code
== GEU
? GTU
: LEU
;
5135 /* Define how to find the value returned by a function. */
5138 arm_function_value(const_tree type
, const_tree func
,
5139 bool outgoing ATTRIBUTE_UNUSED
)
5142 int unsignedp ATTRIBUTE_UNUSED
;
5143 rtx r ATTRIBUTE_UNUSED
;
5145 mode
= TYPE_MODE (type
);
5147 if (TARGET_AAPCS_BASED
)
5148 return aapcs_allocate_return_reg (mode
, type
, func
);
5150 /* Promote integer types. */
5151 if (INTEGRAL_TYPE_P (type
))
5152 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5154 /* Promotes small structs returned in a register to full-word size
5155 for big-endian AAPCS. */
5156 if (arm_return_in_msb (type
))
5158 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5159 if (size
% UNITS_PER_WORD
!= 0)
5161 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5162 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5166 return arm_libcall_value_1 (mode
);
5169 /* libcall hashtable helpers. */
5171 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5173 static inline hashval_t
hash (const rtx_def
*);
5174 static inline bool equal (const rtx_def
*, const rtx_def
*);
5175 static inline void remove (rtx_def
*);
5179 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5181 return rtx_equal_p (p1
, p2
);
5185 libcall_hasher::hash (const rtx_def
*p1
)
5187 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5190 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5193 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5195 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5199 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5201 static bool init_done
= false;
5202 static libcall_table_type
*libcall_htab
= NULL
;
5208 libcall_htab
= new libcall_table_type (31);
5209 add_libcall (libcall_htab
,
5210 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5211 add_libcall (libcall_htab
,
5212 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5213 add_libcall (libcall_htab
,
5214 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5215 add_libcall (libcall_htab
,
5216 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5218 add_libcall (libcall_htab
,
5219 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5220 add_libcall (libcall_htab
,
5221 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5222 add_libcall (libcall_htab
,
5223 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5224 add_libcall (libcall_htab
,
5225 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5227 add_libcall (libcall_htab
,
5228 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5229 add_libcall (libcall_htab
,
5230 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5231 add_libcall (libcall_htab
,
5232 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5233 add_libcall (libcall_htab
,
5234 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5235 add_libcall (libcall_htab
,
5236 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5237 add_libcall (libcall_htab
,
5238 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5239 add_libcall (libcall_htab
,
5240 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5241 add_libcall (libcall_htab
,
5242 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5244 /* Values from double-precision helper functions are returned in core
5245 registers if the selected core only supports single-precision
5246 arithmetic, even if we are using the hard-float ABI. The same is
5247 true for single-precision helpers, but we will never be using the
5248 hard-float ABI on a CPU which doesn't support single-precision
5249 operations in hardware. */
5250 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5251 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5252 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5253 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5254 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5255 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5256 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5257 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5258 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5259 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5260 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5261 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5263 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5267 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5271 arm_libcall_value_1 (machine_mode mode
)
5273 if (TARGET_AAPCS_BASED
)
5274 return aapcs_libcall_value (mode
);
5275 else if (TARGET_IWMMXT_ABI
5276 && arm_vector_mode_supported_p (mode
))
5277 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5279 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5282 /* Define how to find the value returned by a library function
5283 assuming the value has mode MODE. */
5286 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5288 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5289 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5291 /* The following libcalls return their result in integer registers,
5292 even though they return a floating point value. */
5293 if (arm_libcall_uses_aapcs_base (libcall
))
5294 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5298 return arm_libcall_value_1 (mode
);
5301 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5304 arm_function_value_regno_p (const unsigned int regno
)
5306 if (regno
== ARG_REGISTER (1)
5308 && TARGET_AAPCS_BASED
5309 && TARGET_HARD_FLOAT
5310 && regno
== FIRST_VFP_REGNUM
)
5311 || (TARGET_IWMMXT_ABI
5312 && regno
== FIRST_IWMMXT_REGNUM
))
5318 /* Determine the amount of memory needed to store the possible return
5319 registers of an untyped call. */
5321 arm_apply_result_size (void)
5327 if (TARGET_HARD_FLOAT_ABI
)
5329 if (TARGET_IWMMXT_ABI
)
5336 /* Decide whether TYPE should be returned in memory (true)
5337 or in a register (false). FNTYPE is the type of the function making
5340 arm_return_in_memory (const_tree type
, const_tree fntype
)
5344 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5346 if (TARGET_AAPCS_BASED
)
5348 /* Simple, non-aggregate types (ie not including vectors and
5349 complex) are always returned in a register (or registers).
5350 We don't care about which register here, so we can short-cut
5351 some of the detail. */
5352 if (!AGGREGATE_TYPE_P (type
)
5353 && TREE_CODE (type
) != VECTOR_TYPE
5354 && TREE_CODE (type
) != COMPLEX_TYPE
)
5357 /* Any return value that is no larger than one word can be
5359 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5362 /* Check any available co-processors to see if they accept the
5363 type as a register candidate (VFP, for example, can return
5364 some aggregates in consecutive registers). These aren't
5365 available if the call is variadic. */
5366 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5369 /* Vector values should be returned using ARM registers, not
5370 memory (unless they're over 16 bytes, which will break since
5371 we only have four call-clobbered registers to play with). */
5372 if (TREE_CODE (type
) == VECTOR_TYPE
)
5373 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5375 /* The rest go in memory. */
5379 if (TREE_CODE (type
) == VECTOR_TYPE
)
5380 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5382 if (!AGGREGATE_TYPE_P (type
) &&
5383 (TREE_CODE (type
) != VECTOR_TYPE
))
5384 /* All simple types are returned in registers. */
5387 if (arm_abi
!= ARM_ABI_APCS
)
5389 /* ATPCS and later return aggregate types in memory only if they are
5390 larger than a word (or are variable size). */
5391 return (size
< 0 || size
> UNITS_PER_WORD
);
5394 /* For the arm-wince targets we choose to be compatible with Microsoft's
5395 ARM and Thumb compilers, which always return aggregates in memory. */
5397 /* All structures/unions bigger than one word are returned in memory.
5398 Also catch the case where int_size_in_bytes returns -1. In this case
5399 the aggregate is either huge or of variable size, and in either case
5400 we will want to return it via memory and not in a register. */
5401 if (size
< 0 || size
> UNITS_PER_WORD
)
5404 if (TREE_CODE (type
) == RECORD_TYPE
)
5408 /* For a struct the APCS says that we only return in a register
5409 if the type is 'integer like' and every addressable element
5410 has an offset of zero. For practical purposes this means
5411 that the structure can have at most one non bit-field element
5412 and that this element must be the first one in the structure. */
5414 /* Find the first field, ignoring non FIELD_DECL things which will
5415 have been created by C++. */
5416 for (field
= TYPE_FIELDS (type
);
5417 field
&& TREE_CODE (field
) != FIELD_DECL
;
5418 field
= DECL_CHAIN (field
))
5422 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5424 /* Check that the first field is valid for returning in a register. */
5426 /* ... Floats are not allowed */
5427 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5430 /* ... Aggregates that are not themselves valid for returning in
5431 a register are not allowed. */
5432 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5435 /* Now check the remaining fields, if any. Only bitfields are allowed,
5436 since they are not addressable. */
5437 for (field
= DECL_CHAIN (field
);
5439 field
= DECL_CHAIN (field
))
5441 if (TREE_CODE (field
) != FIELD_DECL
)
5444 if (!DECL_BIT_FIELD_TYPE (field
))
5451 if (TREE_CODE (type
) == UNION_TYPE
)
5455 /* Unions can be returned in registers if every element is
5456 integral, or can be returned in an integer register. */
5457 for (field
= TYPE_FIELDS (type
);
5459 field
= DECL_CHAIN (field
))
5461 if (TREE_CODE (field
) != FIELD_DECL
)
5464 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5467 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5473 #endif /* not ARM_WINCE */
5475 /* Return all other types in memory. */
5479 const struct pcs_attribute_arg
5483 } pcs_attribute_args
[] =
5485 {"aapcs", ARM_PCS_AAPCS
},
5486 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5488 /* We could recognize these, but changes would be needed elsewhere
5489 * to implement them. */
5490 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5491 {"atpcs", ARM_PCS_ATPCS
},
5492 {"apcs", ARM_PCS_APCS
},
5494 {NULL
, ARM_PCS_UNKNOWN
}
5498 arm_pcs_from_attribute (tree attr
)
5500 const struct pcs_attribute_arg
*ptr
;
5503 /* Get the value of the argument. */
5504 if (TREE_VALUE (attr
) == NULL_TREE
5505 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5506 return ARM_PCS_UNKNOWN
;
5508 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5510 /* Check it against the list of known arguments. */
5511 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5512 if (streq (arg
, ptr
->arg
))
5515 /* An unrecognized interrupt type. */
5516 return ARM_PCS_UNKNOWN
;
5519 /* Get the PCS variant to use for this call. TYPE is the function's type
5520 specification, DECL is the specific declartion. DECL may be null if
5521 the call could be indirect or if this is a library call. */
5523 arm_get_pcs_model (const_tree type
, const_tree decl
)
5525 bool user_convention
= false;
5526 enum arm_pcs user_pcs
= arm_pcs_default
;
5531 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5534 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5535 user_convention
= true;
5538 if (TARGET_AAPCS_BASED
)
5540 /* Detect varargs functions. These always use the base rules
5541 (no argument is ever a candidate for a co-processor
5543 bool base_rules
= stdarg_p (type
);
5545 if (user_convention
)
5547 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5548 sorry ("non-AAPCS derived PCS variant");
5549 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5550 error ("variadic functions must use the base AAPCS variant");
5554 return ARM_PCS_AAPCS
;
5555 else if (user_convention
)
5557 else if (decl
&& flag_unit_at_a_time
)
5559 /* Local functions never leak outside this compilation unit,
5560 so we are free to use whatever conventions are
5562 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5563 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5565 return ARM_PCS_AAPCS_LOCAL
;
5568 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5569 sorry ("PCS variant");
5571 /* For everything else we use the target's default. */
5572 return arm_pcs_default
;
5577 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5578 const_tree fntype ATTRIBUTE_UNUSED
,
5579 rtx libcall ATTRIBUTE_UNUSED
,
5580 const_tree fndecl ATTRIBUTE_UNUSED
)
5582 /* Record the unallocated VFP registers. */
5583 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5584 pcum
->aapcs_vfp_reg_alloc
= 0;
5587 /* Walk down the type tree of TYPE counting consecutive base elements.
5588 If *MODEP is VOIDmode, then set it to the first valid floating point
5589 type. If a non-floating point type is found, or if a floating point
5590 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5591 otherwise return the count in the sub-tree. */
5593 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5598 switch (TREE_CODE (type
))
5601 mode
= TYPE_MODE (type
);
5602 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5605 if (*modep
== VOIDmode
)
5614 mode
= TYPE_MODE (TREE_TYPE (type
));
5615 if (mode
!= DFmode
&& mode
!= SFmode
)
5618 if (*modep
== VOIDmode
)
5627 /* Use V2SImode and V4SImode as representatives of all 64-bit
5628 and 128-bit vector types, whether or not those modes are
5629 supported with the present options. */
5630 size
= int_size_in_bytes (type
);
5643 if (*modep
== VOIDmode
)
5646 /* Vector modes are considered to be opaque: two vectors are
5647 equivalent for the purposes of being homogeneous aggregates
5648 if they are the same size. */
5657 tree index
= TYPE_DOMAIN (type
);
5659 /* Can't handle incomplete types nor sizes that are not
5661 if (!COMPLETE_TYPE_P (type
)
5662 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5665 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5668 || !TYPE_MAX_VALUE (index
)
5669 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5670 || !TYPE_MIN_VALUE (index
)
5671 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5675 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5676 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5678 /* There must be no padding. */
5679 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5691 /* Can't handle incomplete types nor sizes that are not
5693 if (!COMPLETE_TYPE_P (type
)
5694 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5697 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5699 if (TREE_CODE (field
) != FIELD_DECL
)
5702 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5708 /* There must be no padding. */
5709 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5716 case QUAL_UNION_TYPE
:
5718 /* These aren't very interesting except in a degenerate case. */
5723 /* Can't handle incomplete types nor sizes that are not
5725 if (!COMPLETE_TYPE_P (type
)
5726 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5729 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5731 if (TREE_CODE (field
) != FIELD_DECL
)
5734 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5737 count
= count
> sub_count
? count
: sub_count
;
5740 /* There must be no padding. */
5741 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5754 /* Return true if PCS_VARIANT should use VFP registers. */
5756 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5758 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5760 static bool seen_thumb1_vfp
= false;
5762 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5764 sorry ("Thumb-1 hard-float VFP ABI");
5765 /* sorry() is not immediately fatal, so only display this once. */
5766 seen_thumb1_vfp
= true;
5772 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5775 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5776 (TARGET_VFP_DOUBLE
|| !is_double
));
5779 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5780 suitable for passing or returning in VFP registers for the PCS
5781 variant selected. If it is, then *BASE_MODE is updated to contain
5782 a machine mode describing each element of the argument's type and
5783 *COUNT to hold the number of such elements. */
5785 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5786 machine_mode mode
, const_tree type
,
5787 machine_mode
*base_mode
, int *count
)
5789 machine_mode new_mode
= VOIDmode
;
5791 /* If we have the type information, prefer that to working things
5792 out from the mode. */
5795 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5797 if (ag_count
> 0 && ag_count
<= 4)
5802 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5803 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5804 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5809 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5812 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5818 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5821 *base_mode
= new_mode
;
5826 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5827 machine_mode mode
, const_tree type
)
5829 int count ATTRIBUTE_UNUSED
;
5830 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5832 if (!use_vfp_abi (pcs_variant
, false))
5834 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5839 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5842 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5845 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5846 &pcum
->aapcs_vfp_rmode
,
5847 &pcum
->aapcs_vfp_rcount
);
5850 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5851 for the behaviour of this function. */
5854 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5855 const_tree type ATTRIBUTE_UNUSED
)
5858 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
5859 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
5860 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5863 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5864 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5866 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5868 || (mode
== TImode
&& ! TARGET_NEON
)
5869 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5872 int rcount
= pcum
->aapcs_vfp_rcount
;
5874 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5878 /* Avoid using unsupported vector modes. */
5879 if (rmode
== V2SImode
)
5881 else if (rmode
== V4SImode
)
5888 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5889 for (i
= 0; i
< rcount
; i
++)
5891 rtx tmp
= gen_rtx_REG (rmode
,
5892 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5893 tmp
= gen_rtx_EXPR_LIST
5895 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5896 XVECEXP (par
, 0, i
) = tmp
;
5899 pcum
->aapcs_reg
= par
;
5902 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5908 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5909 comment there for the behaviour of this function. */
5912 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5914 const_tree type ATTRIBUTE_UNUSED
)
5916 if (!use_vfp_abi (pcs_variant
, false))
5920 || (GET_MODE_CLASS (mode
) == MODE_INT
5921 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
5925 machine_mode ag_mode
;
5930 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5935 if (ag_mode
== V2SImode
)
5937 else if (ag_mode
== V4SImode
)
5943 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5944 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5945 for (i
= 0; i
< count
; i
++)
5947 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5948 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5949 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5950 XVECEXP (par
, 0, i
) = tmp
;
5956 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5960 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5961 machine_mode mode ATTRIBUTE_UNUSED
,
5962 const_tree type ATTRIBUTE_UNUSED
)
5964 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5965 pcum
->aapcs_vfp_reg_alloc
= 0;
5969 #define AAPCS_CP(X) \
5971 aapcs_ ## X ## _cum_init, \
5972 aapcs_ ## X ## _is_call_candidate, \
5973 aapcs_ ## X ## _allocate, \
5974 aapcs_ ## X ## _is_return_candidate, \
5975 aapcs_ ## X ## _allocate_return_reg, \
5976 aapcs_ ## X ## _advance \
5979 /* Table of co-processors that can be used to pass arguments in
5980 registers. Idealy no arugment should be a candidate for more than
5981 one co-processor table entry, but the table is processed in order
5982 and stops after the first match. If that entry then fails to put
5983 the argument into a co-processor register, the argument will go on
5987 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5988 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5990 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5991 BLKmode) is a candidate for this co-processor's registers; this
5992 function should ignore any position-dependent state in
5993 CUMULATIVE_ARGS and only use call-type dependent information. */
5994 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5996 /* Return true if the argument does get a co-processor register; it
5997 should set aapcs_reg to an RTX of the register allocated as is
5998 required for a return from FUNCTION_ARG. */
5999 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6001 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6002 be returned in this co-processor's registers. */
6003 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6005 /* Allocate and return an RTX element to hold the return type of a call. This
6006 routine must not fail and will only be called if is_return_candidate
6007 returned true with the same parameters. */
6008 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6010 /* Finish processing this argument and prepare to start processing
6012 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6013 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6021 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6026 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6027 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6034 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6036 /* We aren't passed a decl, so we can't check that a call is local.
6037 However, it isn't clear that that would be a win anyway, since it
6038 might limit some tail-calling opportunities. */
6039 enum arm_pcs pcs_variant
;
6043 const_tree fndecl
= NULL_TREE
;
6045 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6048 fntype
= TREE_TYPE (fntype
);
6051 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6054 pcs_variant
= arm_pcs_default
;
6056 if (pcs_variant
!= ARM_PCS_AAPCS
)
6060 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6061 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6070 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6073 /* We aren't passed a decl, so we can't check that a call is local.
6074 However, it isn't clear that that would be a win anyway, since it
6075 might limit some tail-calling opportunities. */
6076 enum arm_pcs pcs_variant
;
6077 int unsignedp ATTRIBUTE_UNUSED
;
6081 const_tree fndecl
= NULL_TREE
;
6083 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6086 fntype
= TREE_TYPE (fntype
);
6089 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6092 pcs_variant
= arm_pcs_default
;
6094 /* Promote integer types. */
6095 if (type
&& INTEGRAL_TYPE_P (type
))
6096 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6098 if (pcs_variant
!= ARM_PCS_AAPCS
)
6102 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6103 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6105 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6109 /* Promotes small structs returned in a register to full-word size
6110 for big-endian AAPCS. */
6111 if (type
&& arm_return_in_msb (type
))
6113 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6114 if (size
% UNITS_PER_WORD
!= 0)
6116 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6117 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6121 return gen_rtx_REG (mode
, R0_REGNUM
);
6125 aapcs_libcall_value (machine_mode mode
)
6127 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6128 && GET_MODE_SIZE (mode
) <= 4)
6131 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6134 /* Lay out a function argument using the AAPCS rules. The rule
6135 numbers referred to here are those in the AAPCS. */
6137 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6138 const_tree type
, bool named
)
6143 /* We only need to do this once per argument. */
6144 if (pcum
->aapcs_arg_processed
)
6147 pcum
->aapcs_arg_processed
= true;
6149 /* Special case: if named is false then we are handling an incoming
6150 anonymous argument which is on the stack. */
6154 /* Is this a potential co-processor register candidate? */
6155 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6157 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6158 pcum
->aapcs_cprc_slot
= slot
;
6160 /* We don't have to apply any of the rules from part B of the
6161 preparation phase, these are handled elsewhere in the
6166 /* A Co-processor register candidate goes either in its own
6167 class of registers or on the stack. */
6168 if (!pcum
->aapcs_cprc_failed
[slot
])
6170 /* C1.cp - Try to allocate the argument to co-processor
6172 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6175 /* C2.cp - Put the argument on the stack and note that we
6176 can't assign any more candidates in this slot. We also
6177 need to note that we have allocated stack space, so that
6178 we won't later try to split a non-cprc candidate between
6179 core registers and the stack. */
6180 pcum
->aapcs_cprc_failed
[slot
] = true;
6181 pcum
->can_split
= false;
6184 /* We didn't get a register, so this argument goes on the
6186 gcc_assert (pcum
->can_split
== false);
6191 /* C3 - For double-word aligned arguments, round the NCRN up to the
6192 next even number. */
6193 ncrn
= pcum
->aapcs_ncrn
;
6194 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6197 nregs
= ARM_NUM_REGS2(mode
, type
);
6199 /* Sigh, this test should really assert that nregs > 0, but a GCC
6200 extension allows empty structs and then gives them empty size; it
6201 then allows such a structure to be passed by value. For some of
6202 the code below we have to pretend that such an argument has
6203 non-zero size so that we 'locate' it correctly either in
6204 registers or on the stack. */
6205 gcc_assert (nregs
>= 0);
6207 nregs2
= nregs
? nregs
: 1;
6209 /* C4 - Argument fits entirely in core registers. */
6210 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6212 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6213 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6217 /* C5 - Some core registers left and there are no arguments already
6218 on the stack: split this argument between the remaining core
6219 registers and the stack. */
6220 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6222 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6223 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6224 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6228 /* C6 - NCRN is set to 4. */
6229 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6231 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6235 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6236 for a call to a function whose data type is FNTYPE.
6237 For a library call, FNTYPE is NULL. */
6239 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6241 tree fndecl ATTRIBUTE_UNUSED
)
6243 /* Long call handling. */
6245 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6247 pcum
->pcs_variant
= arm_pcs_default
;
6249 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6251 if (arm_libcall_uses_aapcs_base (libname
))
6252 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6254 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6255 pcum
->aapcs_reg
= NULL_RTX
;
6256 pcum
->aapcs_partial
= 0;
6257 pcum
->aapcs_arg_processed
= false;
6258 pcum
->aapcs_cprc_slot
= -1;
6259 pcum
->can_split
= true;
6261 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6265 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6267 pcum
->aapcs_cprc_failed
[i
] = false;
6268 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6276 /* On the ARM, the offset starts at 0. */
6278 pcum
->iwmmxt_nregs
= 0;
6279 pcum
->can_split
= true;
6281 /* Varargs vectors are treated the same as long long.
6282 named_count avoids having to change the way arm handles 'named' */
6283 pcum
->named_count
= 0;
6286 if (TARGET_REALLY_IWMMXT
&& fntype
)
6290 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6292 fn_arg
= TREE_CHAIN (fn_arg
))
6293 pcum
->named_count
+= 1;
6295 if (! pcum
->named_count
)
6296 pcum
->named_count
= INT_MAX
;
6300 /* Return true if mode/type need doubleword alignment. */
6302 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6305 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6307 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6308 if (!AGGREGATE_TYPE_P (type
))
6309 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6311 /* Array types: Use member alignment of element type. */
6312 if (TREE_CODE (type
) == ARRAY_TYPE
)
6313 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6315 /* Record/aggregate types: Use greatest member alignment of any member. */
6316 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6317 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6324 /* Determine where to put an argument to a function.
6325 Value is zero to push the argument on the stack,
6326 or a hard register in which to store the argument.
6328 MODE is the argument's machine mode.
6329 TYPE is the data type of the argument (as a tree).
6330 This is null for libcalls where that information may
6332 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6333 the preceding args and about the function being called.
6334 NAMED is nonzero if this argument is a named parameter
6335 (otherwise it is an extra parameter matching an ellipsis).
6337 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6338 other arguments are passed on the stack. If (NAMED == 0) (which happens
6339 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6340 defined), say it is passed in the stack (function_prologue will
6341 indeed make it pass in the stack if necessary). */
6344 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6345 const_tree type
, bool named
)
6347 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6350 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6351 a call insn (op3 of a call_value insn). */
6352 if (mode
== VOIDmode
)
6355 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6357 aapcs_layout_arg (pcum
, mode
, type
, named
);
6358 return pcum
->aapcs_reg
;
6361 /* Varargs vectors are treated the same as long long.
6362 named_count avoids having to change the way arm handles 'named' */
6363 if (TARGET_IWMMXT_ABI
6364 && arm_vector_mode_supported_p (mode
)
6365 && pcum
->named_count
> pcum
->nargs
+ 1)
6367 if (pcum
->iwmmxt_nregs
<= 9)
6368 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6371 pcum
->can_split
= false;
6376 /* Put doubleword aligned quantities in even register pairs. */
6378 && ARM_DOUBLEWORD_ALIGN
6379 && arm_needs_doubleword_align (mode
, type
))
6382 /* Only allow splitting an arg between regs and memory if all preceding
6383 args were allocated to regs. For args passed by reference we only count
6384 the reference pointer. */
6385 if (pcum
->can_split
)
6388 nregs
= ARM_NUM_REGS2 (mode
, type
);
6390 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6393 return gen_rtx_REG (mode
, pcum
->nregs
);
6397 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6399 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6400 ? DOUBLEWORD_ALIGNMENT
6405 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6406 tree type
, bool named
)
6408 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6409 int nregs
= pcum
->nregs
;
6411 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6413 aapcs_layout_arg (pcum
, mode
, type
, named
);
6414 return pcum
->aapcs_partial
;
6417 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6420 if (NUM_ARG_REGS
> nregs
6421 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6423 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6428 /* Update the data in PCUM to advance over an argument
6429 of mode MODE and data type TYPE.
6430 (TYPE is null for libcalls where that information may not be available.) */
6433 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6434 const_tree type
, bool named
)
6436 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6438 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6440 aapcs_layout_arg (pcum
, mode
, type
, named
);
6442 if (pcum
->aapcs_cprc_slot
>= 0)
6444 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6446 pcum
->aapcs_cprc_slot
= -1;
6449 /* Generic stuff. */
6450 pcum
->aapcs_arg_processed
= false;
6451 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6452 pcum
->aapcs_reg
= NULL_RTX
;
6453 pcum
->aapcs_partial
= 0;
6458 if (arm_vector_mode_supported_p (mode
)
6459 && pcum
->named_count
> pcum
->nargs
6460 && TARGET_IWMMXT_ABI
)
6461 pcum
->iwmmxt_nregs
+= 1;
6463 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6467 /* Variable sized types are passed by reference. This is a GCC
6468 extension to the ARM ABI. */
6471 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6472 machine_mode mode ATTRIBUTE_UNUSED
,
6473 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6475 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6478 /* Encode the current state of the #pragma [no_]long_calls. */
6481 OFF
, /* No #pragma [no_]long_calls is in effect. */
6482 LONG
, /* #pragma long_calls is in effect. */
6483 SHORT
/* #pragma no_long_calls is in effect. */
6486 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6489 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6491 arm_pragma_long_calls
= LONG
;
6495 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6497 arm_pragma_long_calls
= SHORT
;
6501 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6503 arm_pragma_long_calls
= OFF
;
6506 /* Handle an attribute requiring a FUNCTION_DECL;
6507 arguments as in struct attribute_spec.handler. */
6509 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6510 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6512 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6514 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6516 *no_add_attrs
= true;
6522 /* Handle an "interrupt" or "isr" attribute;
6523 arguments as in struct attribute_spec.handler. */
6525 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6530 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6532 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6534 *no_add_attrs
= true;
6536 /* FIXME: the argument if any is checked for type attributes;
6537 should it be checked for decl ones? */
6541 if (TREE_CODE (*node
) == FUNCTION_TYPE
6542 || TREE_CODE (*node
) == METHOD_TYPE
)
6544 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6546 warning (OPT_Wattributes
, "%qE attribute ignored",
6548 *no_add_attrs
= true;
6551 else if (TREE_CODE (*node
) == POINTER_TYPE
6552 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6553 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6554 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6556 *node
= build_variant_type_copy (*node
);
6557 TREE_TYPE (*node
) = build_type_attribute_variant
6559 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6560 *no_add_attrs
= true;
6564 /* Possibly pass this attribute on from the type to a decl. */
6565 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6566 | (int) ATTR_FLAG_FUNCTION_NEXT
6567 | (int) ATTR_FLAG_ARRAY_NEXT
))
6569 *no_add_attrs
= true;
6570 return tree_cons (name
, args
, NULL_TREE
);
6574 warning (OPT_Wattributes
, "%qE attribute ignored",
6583 /* Handle a "pcs" attribute; arguments as in struct
6584 attribute_spec.handler. */
6586 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6587 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6589 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6591 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6592 *no_add_attrs
= true;
6597 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6598 /* Handle the "notshared" attribute. This attribute is another way of
6599 requesting hidden visibility. ARM's compiler supports
6600 "__declspec(notshared)"; we support the same thing via an
6604 arm_handle_notshared_attribute (tree
*node
,
6605 tree name ATTRIBUTE_UNUSED
,
6606 tree args ATTRIBUTE_UNUSED
,
6607 int flags ATTRIBUTE_UNUSED
,
6610 tree decl
= TYPE_NAME (*node
);
6614 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6615 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6616 *no_add_attrs
= false;
6622 /* Return 0 if the attributes for two types are incompatible, 1 if they
6623 are compatible, and 2 if they are nearly compatible (which causes a
6624 warning to be generated). */
6626 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6630 /* Check for mismatch of non-default calling convention. */
6631 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6634 /* Check for mismatched call attributes. */
6635 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6636 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6637 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6638 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6640 /* Only bother to check if an attribute is defined. */
6641 if (l1
| l2
| s1
| s2
)
6643 /* If one type has an attribute, the other must have the same attribute. */
6644 if ((l1
!= l2
) || (s1
!= s2
))
6647 /* Disallow mixed attributes. */
6648 if ((l1
& s2
) || (l2
& s1
))
6652 /* Check for mismatched ISR attribute. */
6653 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6655 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6656 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6658 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6665 /* Assigns default attributes to newly defined type. This is used to
6666 set short_call/long_call attributes for function types of
6667 functions defined inside corresponding #pragma scopes. */
6669 arm_set_default_type_attributes (tree type
)
6671 /* Add __attribute__ ((long_call)) to all functions, when
6672 inside #pragma long_calls or __attribute__ ((short_call)),
6673 when inside #pragma no_long_calls. */
6674 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6676 tree type_attr_list
, attr_name
;
6677 type_attr_list
= TYPE_ATTRIBUTES (type
);
6679 if (arm_pragma_long_calls
== LONG
)
6680 attr_name
= get_identifier ("long_call");
6681 else if (arm_pragma_long_calls
== SHORT
)
6682 attr_name
= get_identifier ("short_call");
6686 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6687 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6691 /* Return true if DECL is known to be linked into section SECTION. */
6694 arm_function_in_section_p (tree decl
, section
*section
)
6696 /* We can only be certain about the prevailing symbol definition. */
6697 if (!decl_binds_to_current_def_p (decl
))
6700 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6701 if (!DECL_SECTION_NAME (decl
))
6703 /* Make sure that we will not create a unique section for DECL. */
6704 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6708 return function_section (decl
) == section
;
6711 /* Return nonzero if a 32-bit "long_call" should be generated for
6712 a call from the current function to DECL. We generate a long_call
6715 a. has an __attribute__((long call))
6716 or b. is within the scope of a #pragma long_calls
6717 or c. the -mlong-calls command line switch has been specified
6719 However we do not generate a long call if the function:
6721 d. has an __attribute__ ((short_call))
6722 or e. is inside the scope of a #pragma no_long_calls
6723 or f. is defined in the same section as the current function. */
6726 arm_is_long_call_p (tree decl
)
6731 return TARGET_LONG_CALLS
;
6733 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6734 if (lookup_attribute ("short_call", attrs
))
6737 /* For "f", be conservative, and only cater for cases in which the
6738 whole of the current function is placed in the same section. */
6739 if (!flag_reorder_blocks_and_partition
6740 && TREE_CODE (decl
) == FUNCTION_DECL
6741 && arm_function_in_section_p (decl
, current_function_section ()))
6744 if (lookup_attribute ("long_call", attrs
))
6747 return TARGET_LONG_CALLS
;
6750 /* Return nonzero if it is ok to make a tail-call to DECL. */
6752 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6754 unsigned long func_type
;
6756 if (cfun
->machine
->sibcall_blocked
)
6759 /* Never tailcall something if we are generating code for Thumb-1. */
6763 /* The PIC register is live on entry to VxWorks PLT entries, so we
6764 must make the call before restoring the PIC register. */
6765 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
6768 /* If we are interworking and the function is not declared static
6769 then we can't tail-call it unless we know that it exists in this
6770 compilation unit (since it might be a Thumb routine). */
6771 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6772 && !TREE_ASM_WRITTEN (decl
))
6775 func_type
= arm_current_func_type ();
6776 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6777 if (IS_INTERRUPT (func_type
))
6780 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6782 /* Check that the return value locations are the same. For
6783 example that we aren't returning a value from the sibling in
6784 a VFP register but then need to transfer it to a core
6787 tree decl_or_type
= decl
;
6789 /* If it is an indirect function pointer, get the function type. */
6791 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
6793 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
6794 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6796 if (!rtx_equal_p (a
, b
))
6800 /* Never tailcall if function may be called with a misaligned SP. */
6801 if (IS_STACKALIGN (func_type
))
6804 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6805 references should become a NOP. Don't convert such calls into
6807 if (TARGET_AAPCS_BASED
6808 && arm_abi
== ARM_ABI_AAPCS
6810 && DECL_WEAK (decl
))
6813 /* Everything else is ok. */
6818 /* Addressing mode support functions. */
6820 /* Return nonzero if X is a legitimate immediate operand when compiling
6821 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6823 legitimate_pic_operand_p (rtx x
)
6825 if (GET_CODE (x
) == SYMBOL_REF
6826 || (GET_CODE (x
) == CONST
6827 && GET_CODE (XEXP (x
, 0)) == PLUS
6828 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6834 /* Record that the current function needs a PIC register. Initialize
6835 cfun->machine->pic_reg if we have not already done so. */
6838 require_pic_register (void)
6840 /* A lot of the logic here is made obscure by the fact that this
6841 routine gets called as part of the rtx cost estimation process.
6842 We don't want those calls to affect any assumptions about the real
6843 function; and further, we can't call entry_of_function() until we
6844 start the real expansion process. */
6845 if (!crtl
->uses_pic_offset_table
)
6847 gcc_assert (can_create_pseudo_p ());
6848 if (arm_pic_register
!= INVALID_REGNUM
6849 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6851 if (!cfun
->machine
->pic_reg
)
6852 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6854 /* Play games to avoid marking the function as needing pic
6855 if we are being called as part of the cost-estimation
6857 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6858 crtl
->uses_pic_offset_table
= 1;
6862 rtx_insn
*seq
, *insn
;
6864 if (!cfun
->machine
->pic_reg
)
6865 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6867 /* Play games to avoid marking the function as needing pic
6868 if we are being called as part of the cost-estimation
6870 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6872 crtl
->uses_pic_offset_table
= 1;
6875 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6876 && arm_pic_register
> LAST_LO_REGNUM
)
6877 emit_move_insn (cfun
->machine
->pic_reg
,
6878 gen_rtx_REG (Pmode
, arm_pic_register
));
6880 arm_load_pic_register (0UL);
6885 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6887 INSN_LOCATION (insn
) = prologue_location
;
6889 /* We can be called during expansion of PHI nodes, where
6890 we can't yet emit instructions directly in the final
6891 insn stream. Queue the insns on the entry edge, they will
6892 be committed after everything else is expanded. */
6893 insert_insn_on_edge (seq
,
6894 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6901 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6903 if (GET_CODE (orig
) == SYMBOL_REF
6904 || GET_CODE (orig
) == LABEL_REF
)
6910 gcc_assert (can_create_pseudo_p ());
6911 reg
= gen_reg_rtx (Pmode
);
6914 /* VxWorks does not impose a fixed gap between segments; the run-time
6915 gap can be different from the object-file gap. We therefore can't
6916 use GOTOFF unless we are absolutely sure that the symbol is in the
6917 same segment as the GOT. Unfortunately, the flexibility of linker
6918 scripts means that we can't be sure of that in general, so assume
6919 that GOTOFF is never valid on VxWorks. */
6920 if ((GET_CODE (orig
) == LABEL_REF
6921 || (GET_CODE (orig
) == SYMBOL_REF
&&
6922 SYMBOL_REF_LOCAL_P (orig
)))
6924 && arm_pic_data_is_text_relative
)
6925 insn
= arm_pic_static_addr (orig
, reg
);
6931 /* If this function doesn't have a pic register, create one now. */
6932 require_pic_register ();
6934 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6936 /* Make the MEM as close to a constant as possible. */
6937 mem
= SET_SRC (pat
);
6938 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6939 MEM_READONLY_P (mem
) = 1;
6940 MEM_NOTRAP_P (mem
) = 1;
6942 insn
= emit_insn (pat
);
6945 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6947 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6951 else if (GET_CODE (orig
) == CONST
)
6955 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6956 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6959 /* Handle the case where we have: const (UNSPEC_TLS). */
6960 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6961 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6964 /* Handle the case where we have:
6965 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6967 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6968 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6969 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6971 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6977 gcc_assert (can_create_pseudo_p ());
6978 reg
= gen_reg_rtx (Pmode
);
6981 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6983 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6984 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6985 base
== reg
? 0 : reg
);
6987 if (CONST_INT_P (offset
))
6989 /* The base register doesn't really matter, we only want to
6990 test the index for the appropriate mode. */
6991 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6993 gcc_assert (can_create_pseudo_p ());
6994 offset
= force_reg (Pmode
, offset
);
6997 if (CONST_INT_P (offset
))
6998 return plus_constant (Pmode
, base
, INTVAL (offset
));
7001 if (GET_MODE_SIZE (mode
) > 4
7002 && (GET_MODE_CLASS (mode
) == MODE_INT
7003 || TARGET_SOFT_FLOAT
))
7005 emit_insn (gen_addsi3 (reg
, base
, offset
));
7009 return gen_rtx_PLUS (Pmode
, base
, offset
);
7016 /* Find a spare register to use during the prolog of a function. */
7019 thumb_find_work_register (unsigned long pushed_regs_mask
)
7023 /* Check the argument registers first as these are call-used. The
7024 register allocation order means that sometimes r3 might be used
7025 but earlier argument registers might not, so check them all. */
7026 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7027 if (!df_regs_ever_live_p (reg
))
7030 /* Before going on to check the call-saved registers we can try a couple
7031 more ways of deducing that r3 is available. The first is when we are
7032 pushing anonymous arguments onto the stack and we have less than 4
7033 registers worth of fixed arguments(*). In this case r3 will be part of
7034 the variable argument list and so we can be sure that it will be
7035 pushed right at the start of the function. Hence it will be available
7036 for the rest of the prologue.
7037 (*): ie crtl->args.pretend_args_size is greater than 0. */
7038 if (cfun
->machine
->uses_anonymous_args
7039 && crtl
->args
.pretend_args_size
> 0)
7040 return LAST_ARG_REGNUM
;
7042 /* The other case is when we have fixed arguments but less than 4 registers
7043 worth. In this case r3 might be used in the body of the function, but
7044 it is not being used to convey an argument into the function. In theory
7045 we could just check crtl->args.size to see how many bytes are
7046 being passed in argument registers, but it seems that it is unreliable.
7047 Sometimes it will have the value 0 when in fact arguments are being
7048 passed. (See testcase execute/20021111-1.c for an example). So we also
7049 check the args_info.nregs field as well. The problem with this field is
7050 that it makes no allowances for arguments that are passed to the
7051 function but which are not used. Hence we could miss an opportunity
7052 when a function has an unused argument in r3. But it is better to be
7053 safe than to be sorry. */
7054 if (! cfun
->machine
->uses_anonymous_args
7055 && crtl
->args
.size
>= 0
7056 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7057 && (TARGET_AAPCS_BASED
7058 ? crtl
->args
.info
.aapcs_ncrn
< 4
7059 : crtl
->args
.info
.nregs
< 4))
7060 return LAST_ARG_REGNUM
;
7062 /* Otherwise look for a call-saved register that is going to be pushed. */
7063 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7064 if (pushed_regs_mask
& (1 << reg
))
7069 /* Thumb-2 can use high regs. */
7070 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7071 if (pushed_regs_mask
& (1 << reg
))
7074 /* Something went wrong - thumb_compute_save_reg_mask()
7075 should have arranged for a suitable register to be pushed. */
7079 static GTY(()) int pic_labelno
;
7081 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7085 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7087 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7089 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7092 gcc_assert (flag_pic
);
7094 pic_reg
= cfun
->machine
->pic_reg
;
7095 if (TARGET_VXWORKS_RTP
)
7097 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7098 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7099 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7101 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7103 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7104 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7108 /* We use an UNSPEC rather than a LABEL_REF because this label
7109 never appears in the code stream. */
7111 labelno
= GEN_INT (pic_labelno
++);
7112 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7113 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7115 /* On the ARM the PC register contains 'dot + 8' at the time of the
7116 addition, on the Thumb it is 'dot + 4'. */
7117 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7118 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7120 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7124 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7126 else /* TARGET_THUMB1 */
7128 if (arm_pic_register
!= INVALID_REGNUM
7129 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7131 /* We will have pushed the pic register, so we should always be
7132 able to find a work register. */
7133 pic_tmp
= gen_rtx_REG (SImode
,
7134 thumb_find_work_register (saved_regs
));
7135 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7136 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7137 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7139 else if (arm_pic_register
!= INVALID_REGNUM
7140 && arm_pic_register
> LAST_LO_REGNUM
7141 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7143 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7144 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7145 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7148 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7152 /* Need to emit this whether or not we obey regdecls,
7153 since setjmp/longjmp can cause life info to screw up. */
7157 /* Generate code to load the address of a static var when flag_pic is set. */
7159 arm_pic_static_addr (rtx orig
, rtx reg
)
7161 rtx l1
, labelno
, offset_rtx
, insn
;
7163 gcc_assert (flag_pic
);
7165 /* We use an UNSPEC rather than a LABEL_REF because this label
7166 never appears in the code stream. */
7167 labelno
= GEN_INT (pic_labelno
++);
7168 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7169 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7171 /* On the ARM the PC register contains 'dot + 8' at the time of the
7172 addition, on the Thumb it is 'dot + 4'. */
7173 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7174 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7175 UNSPEC_SYMBOL_OFFSET
);
7176 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7178 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7182 /* Return nonzero if X is valid as an ARM state addressing register. */
7184 arm_address_register_rtx_p (rtx x
, int strict_p
)
7194 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7196 return (regno
<= LAST_ARM_REGNUM
7197 || regno
>= FIRST_PSEUDO_REGISTER
7198 || regno
== FRAME_POINTER_REGNUM
7199 || regno
== ARG_POINTER_REGNUM
);
7202 /* Return TRUE if this rtx is the difference of a symbol and a label,
7203 and will reduce to a PC-relative relocation in the object file.
7204 Expressions like this can be left alone when generating PIC, rather
7205 than forced through the GOT. */
7207 pcrel_constant_p (rtx x
)
7209 if (GET_CODE (x
) == MINUS
)
7210 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7215 /* Return true if X will surely end up in an index register after next
7218 will_be_in_index_register (const_rtx x
)
7220 /* arm.md: calculate_pic_address will split this into a register. */
7221 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7224 /* Return nonzero if X is a valid ARM state address operand. */
7226 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7230 enum rtx_code code
= GET_CODE (x
);
7232 if (arm_address_register_rtx_p (x
, strict_p
))
7235 use_ldrd
= (TARGET_LDRD
7236 && (mode
== DImode
|| mode
== DFmode
));
7238 if (code
== POST_INC
|| code
== PRE_DEC
7239 || ((code
== PRE_INC
|| code
== POST_DEC
)
7240 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7241 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7243 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7244 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7245 && GET_CODE (XEXP (x
, 1)) == PLUS
7246 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7248 rtx addend
= XEXP (XEXP (x
, 1), 1);
7250 /* Don't allow ldrd post increment by register because it's hard
7251 to fixup invalid register choices. */
7253 && GET_CODE (x
) == POST_MODIFY
7257 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7258 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7261 /* After reload constants split into minipools will have addresses
7262 from a LABEL_REF. */
7263 else if (reload_completed
7264 && (code
== LABEL_REF
7266 && GET_CODE (XEXP (x
, 0)) == PLUS
7267 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7268 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7271 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7274 else if (code
== PLUS
)
7276 rtx xop0
= XEXP (x
, 0);
7277 rtx xop1
= XEXP (x
, 1);
7279 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7280 && ((CONST_INT_P (xop1
)
7281 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7282 || (!strict_p
&& will_be_in_index_register (xop1
))))
7283 || (arm_address_register_rtx_p (xop1
, strict_p
)
7284 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7288 /* Reload currently can't handle MINUS, so disable this for now */
7289 else if (GET_CODE (x
) == MINUS
)
7291 rtx xop0
= XEXP (x
, 0);
7292 rtx xop1
= XEXP (x
, 1);
7294 return (arm_address_register_rtx_p (xop0
, strict_p
)
7295 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7299 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7300 && code
== SYMBOL_REF
7301 && CONSTANT_POOL_ADDRESS_P (x
)
7303 && symbol_mentioned_p (get_pool_constant (x
))
7304 && ! pcrel_constant_p (get_pool_constant (x
))))
7310 /* Return nonzero if X is a valid Thumb-2 address operand. */
7312 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7315 enum rtx_code code
= GET_CODE (x
);
7317 if (arm_address_register_rtx_p (x
, strict_p
))
7320 use_ldrd
= (TARGET_LDRD
7321 && (mode
== DImode
|| mode
== DFmode
));
7323 if (code
== POST_INC
|| code
== PRE_DEC
7324 || ((code
== PRE_INC
|| code
== POST_DEC
)
7325 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7326 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7328 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7329 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7330 && GET_CODE (XEXP (x
, 1)) == PLUS
7331 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7333 /* Thumb-2 only has autoincrement by constant. */
7334 rtx addend
= XEXP (XEXP (x
, 1), 1);
7335 HOST_WIDE_INT offset
;
7337 if (!CONST_INT_P (addend
))
7340 offset
= INTVAL(addend
);
7341 if (GET_MODE_SIZE (mode
) <= 4)
7342 return (offset
> -256 && offset
< 256);
7344 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7345 && (offset
& 3) == 0);
7348 /* After reload constants split into minipools will have addresses
7349 from a LABEL_REF. */
7350 else if (reload_completed
7351 && (code
== LABEL_REF
7353 && GET_CODE (XEXP (x
, 0)) == PLUS
7354 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7355 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7358 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7361 else if (code
== PLUS
)
7363 rtx xop0
= XEXP (x
, 0);
7364 rtx xop1
= XEXP (x
, 1);
7366 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7367 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7368 || (!strict_p
&& will_be_in_index_register (xop1
))))
7369 || (arm_address_register_rtx_p (xop1
, strict_p
)
7370 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7373 /* Normally we can assign constant values to target registers without
7374 the help of constant pool. But there are cases we have to use constant
7376 1) assign a label to register.
7377 2) sign-extend a 8bit value to 32bit and then assign to register.
7379 Constant pool access in format:
7380 (set (reg r0) (mem (symbol_ref (".LC0"))))
7381 will cause the use of literal pool (later in function arm_reorg).
7382 So here we mark such format as an invalid format, then the compiler
7383 will adjust it into:
7384 (set (reg r0) (symbol_ref (".LC0")))
7385 (set (reg r0) (mem (reg r0))).
7386 No extra register is required, and (mem (reg r0)) won't cause the use
7387 of literal pools. */
7388 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7389 && CONSTANT_POOL_ADDRESS_P (x
))
7392 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7393 && code
== SYMBOL_REF
7394 && CONSTANT_POOL_ADDRESS_P (x
)
7396 && symbol_mentioned_p (get_pool_constant (x
))
7397 && ! pcrel_constant_p (get_pool_constant (x
))))
7403 /* Return nonzero if INDEX is valid for an address index operand in
7406 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7409 HOST_WIDE_INT range
;
7410 enum rtx_code code
= GET_CODE (index
);
7412 /* Standard coprocessor addressing modes. */
7413 if (TARGET_HARD_FLOAT
7414 && (mode
== SFmode
|| mode
== DFmode
))
7415 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7416 && INTVAL (index
) > -1024
7417 && (INTVAL (index
) & 3) == 0);
7419 /* For quad modes, we restrict the constant offset to be slightly less
7420 than what the instruction format permits. We do this because for
7421 quad mode moves, we will actually decompose them into two separate
7422 double-mode reads or writes. INDEX must therefore be a valid
7423 (double-mode) offset and so should INDEX+8. */
7424 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7425 return (code
== CONST_INT
7426 && INTVAL (index
) < 1016
7427 && INTVAL (index
) > -1024
7428 && (INTVAL (index
) & 3) == 0);
7430 /* We have no such constraint on double mode offsets, so we permit the
7431 full range of the instruction format. */
7432 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7433 return (code
== CONST_INT
7434 && INTVAL (index
) < 1024
7435 && INTVAL (index
) > -1024
7436 && (INTVAL (index
) & 3) == 0);
7438 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7439 return (code
== CONST_INT
7440 && INTVAL (index
) < 1024
7441 && INTVAL (index
) > -1024
7442 && (INTVAL (index
) & 3) == 0);
7444 if (arm_address_register_rtx_p (index
, strict_p
)
7445 && (GET_MODE_SIZE (mode
) <= 4))
7448 if (mode
== DImode
|| mode
== DFmode
)
7450 if (code
== CONST_INT
)
7452 HOST_WIDE_INT val
= INTVAL (index
);
7455 return val
> -256 && val
< 256;
7457 return val
> -4096 && val
< 4092;
7460 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7463 if (GET_MODE_SIZE (mode
) <= 4
7467 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7471 rtx xiop0
= XEXP (index
, 0);
7472 rtx xiop1
= XEXP (index
, 1);
7474 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7475 && power_of_two_operand (xiop1
, SImode
))
7476 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7477 && power_of_two_operand (xiop0
, SImode
)));
7479 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7480 || code
== ASHIFT
|| code
== ROTATERT
)
7482 rtx op
= XEXP (index
, 1);
7484 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7487 && INTVAL (op
) <= 31);
7491 /* For ARM v4 we may be doing a sign-extend operation during the
7497 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7503 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7505 return (code
== CONST_INT
7506 && INTVAL (index
) < range
7507 && INTVAL (index
) > -range
);
7510 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7511 index operand. i.e. 1, 2, 4 or 8. */
7513 thumb2_index_mul_operand (rtx op
)
7517 if (!CONST_INT_P (op
))
7521 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7524 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7526 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7528 enum rtx_code code
= GET_CODE (index
);
7530 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7531 /* Standard coprocessor addressing modes. */
7532 if (TARGET_HARD_FLOAT
7533 && (mode
== SFmode
|| mode
== DFmode
))
7534 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7535 /* Thumb-2 allows only > -256 index range for it's core register
7536 load/stores. Since we allow SF/DF in core registers, we have
7537 to use the intersection between -256~4096 (core) and -1024~1024
7539 && INTVAL (index
) > -256
7540 && (INTVAL (index
) & 3) == 0);
7542 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7544 /* For DImode assume values will usually live in core regs
7545 and only allow LDRD addressing modes. */
7546 if (!TARGET_LDRD
|| mode
!= DImode
)
7547 return (code
== CONST_INT
7548 && INTVAL (index
) < 1024
7549 && INTVAL (index
) > -1024
7550 && (INTVAL (index
) & 3) == 0);
7553 /* For quad modes, we restrict the constant offset to be slightly less
7554 than what the instruction format permits. We do this because for
7555 quad mode moves, we will actually decompose them into two separate
7556 double-mode reads or writes. INDEX must therefore be a valid
7557 (double-mode) offset and so should INDEX+8. */
7558 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7559 return (code
== CONST_INT
7560 && INTVAL (index
) < 1016
7561 && INTVAL (index
) > -1024
7562 && (INTVAL (index
) & 3) == 0);
7564 /* We have no such constraint on double mode offsets, so we permit the
7565 full range of the instruction format. */
7566 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7567 return (code
== CONST_INT
7568 && INTVAL (index
) < 1024
7569 && INTVAL (index
) > -1024
7570 && (INTVAL (index
) & 3) == 0);
7572 if (arm_address_register_rtx_p (index
, strict_p
)
7573 && (GET_MODE_SIZE (mode
) <= 4))
7576 if (mode
== DImode
|| mode
== DFmode
)
7578 if (code
== CONST_INT
)
7580 HOST_WIDE_INT val
= INTVAL (index
);
7581 /* ??? Can we assume ldrd for thumb2? */
7582 /* Thumb-2 ldrd only has reg+const addressing modes. */
7583 /* ldrd supports offsets of +-1020.
7584 However the ldr fallback does not. */
7585 return val
> -256 && val
< 256 && (val
& 3) == 0;
7593 rtx xiop0
= XEXP (index
, 0);
7594 rtx xiop1
= XEXP (index
, 1);
7596 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7597 && thumb2_index_mul_operand (xiop1
))
7598 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7599 && thumb2_index_mul_operand (xiop0
)));
7601 else if (code
== ASHIFT
)
7603 rtx op
= XEXP (index
, 1);
7605 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7608 && INTVAL (op
) <= 3);
7611 return (code
== CONST_INT
7612 && INTVAL (index
) < 4096
7613 && INTVAL (index
) > -256);
7616 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7618 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7628 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7630 return (regno
<= LAST_LO_REGNUM
7631 || regno
> LAST_VIRTUAL_REGISTER
7632 || regno
== FRAME_POINTER_REGNUM
7633 || (GET_MODE_SIZE (mode
) >= 4
7634 && (regno
== STACK_POINTER_REGNUM
7635 || regno
>= FIRST_PSEUDO_REGISTER
7636 || x
== hard_frame_pointer_rtx
7637 || x
== arg_pointer_rtx
)));
7640 /* Return nonzero if x is a legitimate index register. This is the case
7641 for any base register that can access a QImode object. */
7643 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7645 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7648 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7650 The AP may be eliminated to either the SP or the FP, so we use the
7651 least common denominator, e.g. SImode, and offsets from 0 to 64.
7653 ??? Verify whether the above is the right approach.
7655 ??? Also, the FP may be eliminated to the SP, so perhaps that
7656 needs special handling also.
7658 ??? Look at how the mips16 port solves this problem. It probably uses
7659 better ways to solve some of these problems.
7661 Although it is not incorrect, we don't accept QImode and HImode
7662 addresses based on the frame pointer or arg pointer until the
7663 reload pass starts. This is so that eliminating such addresses
7664 into stack based ones won't produce impossible code. */
7666 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7668 /* ??? Not clear if this is right. Experiment. */
7669 if (GET_MODE_SIZE (mode
) < 4
7670 && !(reload_in_progress
|| reload_completed
)
7671 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7672 || reg_mentioned_p (arg_pointer_rtx
, x
)
7673 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7674 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7675 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7676 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7679 /* Accept any base register. SP only in SImode or larger. */
7680 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7683 /* This is PC relative data before arm_reorg runs. */
7684 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7685 && GET_CODE (x
) == SYMBOL_REF
7686 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7689 /* This is PC relative data after arm_reorg runs. */
7690 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7692 && (GET_CODE (x
) == LABEL_REF
7693 || (GET_CODE (x
) == CONST
7694 && GET_CODE (XEXP (x
, 0)) == PLUS
7695 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7696 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7699 /* Post-inc indexing only supported for SImode and larger. */
7700 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7701 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7704 else if (GET_CODE (x
) == PLUS
)
7706 /* REG+REG address can be any two index registers. */
7707 /* We disallow FRAME+REG addressing since we know that FRAME
7708 will be replaced with STACK, and SP relative addressing only
7709 permits SP+OFFSET. */
7710 if (GET_MODE_SIZE (mode
) <= 4
7711 && XEXP (x
, 0) != frame_pointer_rtx
7712 && XEXP (x
, 1) != frame_pointer_rtx
7713 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7714 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7715 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7718 /* REG+const has 5-7 bit offset for non-SP registers. */
7719 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7720 || XEXP (x
, 0) == arg_pointer_rtx
)
7721 && CONST_INT_P (XEXP (x
, 1))
7722 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7725 /* REG+const has 10-bit offset for SP, but only SImode and
7726 larger is supported. */
7727 /* ??? Should probably check for DI/DFmode overflow here
7728 just like GO_IF_LEGITIMATE_OFFSET does. */
7729 else if (REG_P (XEXP (x
, 0))
7730 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7731 && GET_MODE_SIZE (mode
) >= 4
7732 && CONST_INT_P (XEXP (x
, 1))
7733 && INTVAL (XEXP (x
, 1)) >= 0
7734 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7735 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7738 else if (REG_P (XEXP (x
, 0))
7739 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7740 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7741 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7742 && REGNO (XEXP (x
, 0))
7743 <= LAST_VIRTUAL_POINTER_REGISTER
))
7744 && GET_MODE_SIZE (mode
) >= 4
7745 && CONST_INT_P (XEXP (x
, 1))
7746 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7750 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7751 && GET_MODE_SIZE (mode
) == 4
7752 && GET_CODE (x
) == SYMBOL_REF
7753 && CONSTANT_POOL_ADDRESS_P (x
)
7755 && symbol_mentioned_p (get_pool_constant (x
))
7756 && ! pcrel_constant_p (get_pool_constant (x
))))
7762 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7763 instruction of mode MODE. */
7765 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7767 switch (GET_MODE_SIZE (mode
))
7770 return val
>= 0 && val
< 32;
7773 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7777 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7783 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7786 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7787 else if (TARGET_THUMB2
)
7788 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7789 else /* if (TARGET_THUMB1) */
7790 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7793 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7795 Given an rtx X being reloaded into a reg required to be
7796 in class CLASS, return the class of reg to actually use.
7797 In general this is just CLASS, but for the Thumb core registers and
7798 immediate constants we prefer a LO_REGS class or a subset. */
7801 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7807 if (rclass
== GENERAL_REGS
)
7814 /* Build the SYMBOL_REF for __tls_get_addr. */
7816 static GTY(()) rtx tls_get_addr_libfunc
;
7819 get_tls_get_addr (void)
7821 if (!tls_get_addr_libfunc
)
7822 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7823 return tls_get_addr_libfunc
;
7827 arm_load_tp (rtx target
)
7830 target
= gen_reg_rtx (SImode
);
7834 /* Can return in any reg. */
7835 emit_insn (gen_load_tp_hard (target
));
7839 /* Always returned in r0. Immediately copy the result into a pseudo,
7840 otherwise other uses of r0 (e.g. setting up function arguments) may
7841 clobber the value. */
7845 emit_insn (gen_load_tp_soft ());
7847 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7848 emit_move_insn (target
, tmp
);
7854 load_tls_operand (rtx x
, rtx reg
)
7858 if (reg
== NULL_RTX
)
7859 reg
= gen_reg_rtx (SImode
);
7861 tmp
= gen_rtx_CONST (SImode
, x
);
7863 emit_move_insn (reg
, tmp
);
7869 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7871 rtx label
, labelno
, sum
;
7873 gcc_assert (reloc
!= TLS_DESCSEQ
);
7876 labelno
= GEN_INT (pic_labelno
++);
7877 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7878 label
= gen_rtx_CONST (VOIDmode
, label
);
7880 sum
= gen_rtx_UNSPEC (Pmode
,
7881 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7882 GEN_INT (TARGET_ARM
? 8 : 4)),
7884 reg
= load_tls_operand (sum
, reg
);
7887 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7889 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7891 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7892 LCT_PURE
, /* LCT_CONST? */
7893 Pmode
, 1, reg
, Pmode
);
7895 rtx_insn
*insns
= get_insns ();
7902 arm_tls_descseq_addr (rtx x
, rtx reg
)
7904 rtx labelno
= GEN_INT (pic_labelno
++);
7905 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7906 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7907 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7908 gen_rtx_CONST (VOIDmode
, label
),
7909 GEN_INT (!TARGET_ARM
)),
7911 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7913 emit_insn (gen_tlscall (x
, labelno
));
7915 reg
= gen_reg_rtx (SImode
);
7917 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7919 emit_move_insn (reg
, reg0
);
7925 legitimize_tls_address (rtx x
, rtx reg
)
7927 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
7929 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7933 case TLS_MODEL_GLOBAL_DYNAMIC
:
7934 if (TARGET_GNU2_TLS
)
7936 reg
= arm_tls_descseq_addr (x
, reg
);
7938 tp
= arm_load_tp (NULL_RTX
);
7940 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7944 /* Original scheme */
7945 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7946 dest
= gen_reg_rtx (Pmode
);
7947 emit_libcall_block (insns
, dest
, ret
, x
);
7951 case TLS_MODEL_LOCAL_DYNAMIC
:
7952 if (TARGET_GNU2_TLS
)
7954 reg
= arm_tls_descseq_addr (x
, reg
);
7956 tp
= arm_load_tp (NULL_RTX
);
7958 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7962 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7964 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7965 share the LDM result with other LD model accesses. */
7966 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7968 dest
= gen_reg_rtx (Pmode
);
7969 emit_libcall_block (insns
, dest
, ret
, eqv
);
7971 /* Load the addend. */
7972 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7973 GEN_INT (TLS_LDO32
)),
7975 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7976 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7980 case TLS_MODEL_INITIAL_EXEC
:
7981 labelno
= GEN_INT (pic_labelno
++);
7982 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7983 label
= gen_rtx_CONST (VOIDmode
, label
);
7984 sum
= gen_rtx_UNSPEC (Pmode
,
7985 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7986 GEN_INT (TARGET_ARM
? 8 : 4)),
7988 reg
= load_tls_operand (sum
, reg
);
7991 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7992 else if (TARGET_THUMB2
)
7993 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7996 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7997 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8000 tp
= arm_load_tp (NULL_RTX
);
8002 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8004 case TLS_MODEL_LOCAL_EXEC
:
8005 tp
= arm_load_tp (NULL_RTX
);
8007 reg
= gen_rtx_UNSPEC (Pmode
,
8008 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8010 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8012 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8019 /* Try machine-dependent ways of modifying an illegitimate address
8020 to be legitimate. If we find one, return the new, valid address. */
8022 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8024 if (arm_tls_referenced_p (x
))
8028 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8030 addend
= XEXP (XEXP (x
, 0), 1);
8031 x
= XEXP (XEXP (x
, 0), 0);
8034 if (GET_CODE (x
) != SYMBOL_REF
)
8037 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8039 x
= legitimize_tls_address (x
, NULL_RTX
);
8043 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8052 /* TODO: legitimize_address for Thumb2. */
8055 return thumb_legitimize_address (x
, orig_x
, mode
);
8058 if (GET_CODE (x
) == PLUS
)
8060 rtx xop0
= XEXP (x
, 0);
8061 rtx xop1
= XEXP (x
, 1);
8063 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8064 xop0
= force_reg (SImode
, xop0
);
8066 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8067 && !symbol_mentioned_p (xop1
))
8068 xop1
= force_reg (SImode
, xop1
);
8070 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8071 && CONST_INT_P (xop1
))
8073 HOST_WIDE_INT n
, low_n
;
8077 /* VFP addressing modes actually allow greater offsets, but for
8078 now we just stick with the lowest common denominator. */
8079 if (mode
== DImode
|| mode
== DFmode
)
8091 low_n
= ((mode
) == TImode
? 0
8092 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8096 base_reg
= gen_reg_rtx (SImode
);
8097 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8098 emit_move_insn (base_reg
, val
);
8099 x
= plus_constant (Pmode
, base_reg
, low_n
);
8101 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8102 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8105 /* XXX We don't allow MINUS any more -- see comment in
8106 arm_legitimate_address_outer_p (). */
8107 else if (GET_CODE (x
) == MINUS
)
8109 rtx xop0
= XEXP (x
, 0);
8110 rtx xop1
= XEXP (x
, 1);
8112 if (CONSTANT_P (xop0
))
8113 xop0
= force_reg (SImode
, xop0
);
8115 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8116 xop1
= force_reg (SImode
, xop1
);
8118 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8119 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8122 /* Make sure to take full advantage of the pre-indexed addressing mode
8123 with absolute addresses which often allows for the base register to
8124 be factorized for multiple adjacent memory references, and it might
8125 even allows for the mini pool to be avoided entirely. */
8126 else if (CONST_INT_P (x
) && optimize
> 0)
8129 HOST_WIDE_INT mask
, base
, index
;
8132 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8133 use a 8-bit index. So let's use a 12-bit index for SImode only and
8134 hope that arm_gen_constant will enable ldrb to use more bits. */
8135 bits
= (mode
== SImode
) ? 12 : 8;
8136 mask
= (1 << bits
) - 1;
8137 base
= INTVAL (x
) & ~mask
;
8138 index
= INTVAL (x
) & mask
;
8139 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8141 /* It'll most probably be more efficient to generate the base
8142 with more bits set and use a negative index instead. */
8146 base_reg
= force_reg (SImode
, GEN_INT (base
));
8147 x
= plus_constant (Pmode
, base_reg
, index
);
8152 /* We need to find and carefully transform any SYMBOL and LABEL
8153 references; so go back to the original address expression. */
8154 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8156 if (new_x
!= orig_x
)
8164 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8165 to be legitimate. If we find one, return the new, valid address. */
8167 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8169 if (GET_CODE (x
) == PLUS
8170 && CONST_INT_P (XEXP (x
, 1))
8171 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8172 || INTVAL (XEXP (x
, 1)) < 0))
8174 rtx xop0
= XEXP (x
, 0);
8175 rtx xop1
= XEXP (x
, 1);
8176 HOST_WIDE_INT offset
= INTVAL (xop1
);
8178 /* Try and fold the offset into a biasing of the base register and
8179 then offsetting that. Don't do this when optimizing for space
8180 since it can cause too many CSEs. */
8181 if (optimize_size
&& offset
>= 0
8182 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8184 HOST_WIDE_INT delta
;
8187 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8188 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8189 delta
= 31 * GET_MODE_SIZE (mode
);
8191 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8193 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8195 x
= plus_constant (Pmode
, xop0
, delta
);
8197 else if (offset
< 0 && offset
> -256)
8198 /* Small negative offsets are best done with a subtract before the
8199 dereference, forcing these into a register normally takes two
8201 x
= force_operand (x
, NULL_RTX
);
8204 /* For the remaining cases, force the constant into a register. */
8205 xop1
= force_reg (SImode
, xop1
);
8206 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8209 else if (GET_CODE (x
) == PLUS
8210 && s_register_operand (XEXP (x
, 1), SImode
)
8211 && !s_register_operand (XEXP (x
, 0), SImode
))
8213 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8215 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8220 /* We need to find and carefully transform any SYMBOL and LABEL
8221 references; so go back to the original address expression. */
8222 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8224 if (new_x
!= orig_x
)
8231 /* Return TRUE if X contains any TLS symbol references. */
8234 arm_tls_referenced_p (rtx x
)
8236 if (! TARGET_HAVE_TLS
)
8239 subrtx_iterator::array_type array
;
8240 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8242 const_rtx x
= *iter
;
8243 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8246 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8247 TLS offsets, not real symbol references. */
8248 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8249 iter
.skip_subrtxes ();
8254 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8256 On the ARM, allow any integer (invalid ones are removed later by insn
8257 patterns), nice doubles and symbol_refs which refer to the function's
8260 When generating pic allow anything. */
8263 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8265 return flag_pic
|| !label_mentioned_p (x
);
8269 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8271 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8272 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8273 for ARMv8-M Baseline or later the result is valid. */
8274 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8277 return (CONST_INT_P (x
)
8278 || CONST_DOUBLE_P (x
)
8279 || CONSTANT_ADDRESS_P (x
)
8284 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8286 return (!arm_cannot_force_const_mem (mode
, x
)
8288 ? arm_legitimate_constant_p_1 (mode
, x
)
8289 : thumb_legitimate_constant_p (mode
, x
)));
8292 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8295 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8299 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8301 split_const (x
, &base
, &offset
);
8302 if (GET_CODE (base
) == SYMBOL_REF
8303 && !offset_within_block_p (base
, INTVAL (offset
)))
8306 return arm_tls_referenced_p (x
);
8309 #define REG_OR_SUBREG_REG(X) \
8311 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8313 #define REG_OR_SUBREG_RTX(X) \
8314 (REG_P (X) ? (X) : SUBREG_REG (X))
8317 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8319 machine_mode mode
= GET_MODE (x
);
8328 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8335 return COSTS_N_INSNS (1);
8338 if (CONST_INT_P (XEXP (x
, 1)))
8341 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8348 return COSTS_N_INSNS (2) + cycles
;
8350 return COSTS_N_INSNS (1) + 16;
8353 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8355 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8356 return (COSTS_N_INSNS (words
)
8357 + 4 * ((MEM_P (SET_SRC (x
)))
8358 + MEM_P (SET_DEST (x
))));
8363 if (UINTVAL (x
) < 256
8364 /* 16-bit constant. */
8365 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8367 if (thumb_shiftable_const (INTVAL (x
)))
8368 return COSTS_N_INSNS (2);
8369 return COSTS_N_INSNS (3);
8371 else if ((outer
== PLUS
|| outer
== COMPARE
)
8372 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8374 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8375 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8376 return COSTS_N_INSNS (1);
8377 else if (outer
== AND
)
8380 /* This duplicates the tests in the andsi3 expander. */
8381 for (i
= 9; i
<= 31; i
++)
8382 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8383 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8384 return COSTS_N_INSNS (2);
8386 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8387 || outer
== LSHIFTRT
)
8389 return COSTS_N_INSNS (2);
8395 return COSTS_N_INSNS (3);
8413 /* XXX another guess. */
8414 /* Memory costs quite a lot for the first word, but subsequent words
8415 load at the equivalent of a single insn each. */
8416 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8417 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8422 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8428 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8429 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8435 return total
+ COSTS_N_INSNS (1);
8437 /* Assume a two-shift sequence. Increase the cost slightly so
8438 we prefer actual shifts over an extend operation. */
8439 return total
+ 1 + COSTS_N_INSNS (2);
8446 /* Estimates the size cost of thumb1 instructions.
8447 For now most of the code is copied from thumb1_rtx_costs. We need more
8448 fine grain tuning when we have more related test cases. */
8450 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8452 machine_mode mode
= GET_MODE (x
);
8461 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8465 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8466 defined by RTL expansion, especially for the expansion of
8468 if ((GET_CODE (XEXP (x
, 0)) == MULT
8469 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8470 || (GET_CODE (XEXP (x
, 1)) == MULT
8471 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8472 return COSTS_N_INSNS (2);
8477 return COSTS_N_INSNS (1);
8480 if (CONST_INT_P (XEXP (x
, 1)))
8482 /* Thumb1 mul instruction can't operate on const. We must Load it
8483 into a register first. */
8484 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8485 /* For the targets which have a very small and high-latency multiply
8486 unit, we prefer to synthesize the mult with up to 5 instructions,
8487 giving a good balance between size and performance. */
8488 if (arm_arch6m
&& arm_m_profile_small_mul
)
8489 return COSTS_N_INSNS (5);
8491 return COSTS_N_INSNS (1) + const_size
;
8493 return COSTS_N_INSNS (1);
8496 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8498 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8499 cost
= COSTS_N_INSNS (words
);
8500 if (satisfies_constraint_J (SET_SRC (x
))
8501 || satisfies_constraint_K (SET_SRC (x
))
8502 /* Too big an immediate for a 2-byte mov, using MOVT. */
8503 || (CONST_INT_P (SET_SRC (x
))
8504 && UINTVAL (SET_SRC (x
)) >= 256
8506 && satisfies_constraint_j (SET_SRC (x
)))
8507 /* thumb1_movdi_insn. */
8508 || ((words
> 1) && MEM_P (SET_SRC (x
))))
8509 cost
+= COSTS_N_INSNS (1);
8515 if (UINTVAL (x
) < 256)
8516 return COSTS_N_INSNS (1);
8517 /* movw is 4byte long. */
8518 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
8519 return COSTS_N_INSNS (2);
8520 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8521 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8522 return COSTS_N_INSNS (2);
8523 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8524 if (thumb_shiftable_const (INTVAL (x
)))
8525 return COSTS_N_INSNS (2);
8526 return COSTS_N_INSNS (3);
8528 else if ((outer
== PLUS
|| outer
== COMPARE
)
8529 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8531 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8532 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8533 return COSTS_N_INSNS (1);
8534 else if (outer
== AND
)
8537 /* This duplicates the tests in the andsi3 expander. */
8538 for (i
= 9; i
<= 31; i
++)
8539 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8540 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8541 return COSTS_N_INSNS (2);
8543 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8544 || outer
== LSHIFTRT
)
8546 return COSTS_N_INSNS (2);
8552 return COSTS_N_INSNS (3);
8566 return COSTS_N_INSNS (1);
8569 return (COSTS_N_INSNS (1)
8571 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8572 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8573 ? COSTS_N_INSNS (1) : 0));
8577 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8582 /* XXX still guessing. */
8583 switch (GET_MODE (XEXP (x
, 0)))
8586 return (1 + (mode
== DImode
? 4 : 0)
8587 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8590 return (4 + (mode
== DImode
? 4 : 0)
8591 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8594 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8605 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8606 operand, then return the operand that is being shifted. If the shift
8607 is not by a constant, then set SHIFT_REG to point to the operand.
8608 Return NULL if OP is not a shifter operand. */
8610 shifter_op_p (rtx op
, rtx
*shift_reg
)
8612 enum rtx_code code
= GET_CODE (op
);
8614 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
8615 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
8616 return XEXP (op
, 0);
8617 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
8618 return XEXP (op
, 0);
8619 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
8620 || code
== ASHIFTRT
)
8622 if (!CONST_INT_P (XEXP (op
, 1)))
8623 *shift_reg
= XEXP (op
, 1);
8624 return XEXP (op
, 0);
8631 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
8633 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
8634 rtx_code code
= GET_CODE (x
);
8635 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
8637 switch (XINT (x
, 1))
8639 case UNSPEC_UNALIGNED_LOAD
:
8640 /* We can only do unaligned loads into the integer unit, and we can't
8642 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
8644 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
8645 + extra_cost
->ldst
.load_unaligned
);
8648 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
8649 ADDR_SPACE_GENERIC
, speed_p
);
8653 case UNSPEC_UNALIGNED_STORE
:
8654 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
8656 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
8657 + extra_cost
->ldst
.store_unaligned
);
8659 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
8661 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
8662 ADDR_SPACE_GENERIC
, speed_p
);
8673 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
8677 *cost
= COSTS_N_INSNS (2);
8683 /* Cost of a libcall. We assume one insn per argument, an amount for the
8684 call (one insn for -Os) and then one for processing the result. */
8685 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
8687 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
8690 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
8691 if (shift_op != NULL \
8692 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
8697 *cost += extra_cost->alu.arith_shift_reg; \
8698 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
8699 ASHIFT, 1, speed_p); \
8702 *cost += extra_cost->alu.arith_shift; \
8704 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
8705 ASHIFT, 0, speed_p) \
8706 + rtx_cost (XEXP (x, 1 - IDX), \
8707 GET_MODE (shift_op), \
8714 /* RTX costs. Make an estimate of the cost of executing the operation
8715 X, which is contained with an operation with code OUTER_CODE.
8716 SPEED_P indicates whether the cost desired is the performance cost,
8717 or the size cost. The estimate is stored in COST and the return
8718 value is TRUE if the cost calculation is final, or FALSE if the
8719 caller should recurse through the operands of X to add additional
8722 We currently make no attempt to model the size savings of Thumb-2
8723 16-bit instructions. At the normal points in compilation where
8724 this code is called we have no measure of whether the condition
8725 flags are live or not, and thus no realistic way to determine what
8726 the size will eventually be. */
8728 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8729 const struct cpu_cost_table
*extra_cost
,
8730 int *cost
, bool speed_p
)
8732 machine_mode mode
= GET_MODE (x
);
8734 *cost
= COSTS_N_INSNS (1);
8739 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
8741 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8749 /* SET RTXs don't have a mode so we get it from the destination. */
8750 mode
= GET_MODE (SET_DEST (x
));
8752 if (REG_P (SET_SRC (x
))
8753 && REG_P (SET_DEST (x
)))
8755 /* Assume that most copies can be done with a single insn,
8756 unless we don't have HW FP, in which case everything
8757 larger than word mode will require two insns. */
8758 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
8759 && GET_MODE_SIZE (mode
) > 4)
8762 /* Conditional register moves can be encoded
8763 in 16 bits in Thumb mode. */
8764 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
8770 if (CONST_INT_P (SET_SRC (x
)))
8772 /* Handle CONST_INT here, since the value doesn't have a mode
8773 and we would otherwise be unable to work out the true cost. */
8774 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
8777 /* Slightly lower the cost of setting a core reg to a constant.
8778 This helps break up chains and allows for better scheduling. */
8779 if (REG_P (SET_DEST (x
))
8780 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
8783 /* Immediate moves with an immediate in the range [0, 255] can be
8784 encoded in 16 bits in Thumb mode. */
8785 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
8786 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
8788 goto const_int_cost
;
8794 /* A memory access costs 1 insn if the mode is small, or the address is
8795 a single register, otherwise it costs one insn per word. */
8796 if (REG_P (XEXP (x
, 0)))
8797 *cost
= COSTS_N_INSNS (1);
8799 && GET_CODE (XEXP (x
, 0)) == PLUS
8800 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8801 /* This will be split into two instructions.
8802 See arm.md:calculate_pic_address. */
8803 *cost
= COSTS_N_INSNS (2);
8805 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8807 /* For speed optimizations, add the costs of the address and
8808 accessing memory. */
8811 *cost
+= (extra_cost
->ldst
.load
8812 + arm_address_cost (XEXP (x
, 0), mode
,
8813 ADDR_SPACE_GENERIC
, speed_p
));
8815 *cost
+= extra_cost
->ldst
.load
;
8821 /* Calculations of LDM costs are complex. We assume an initial cost
8822 (ldm_1st) which will load the number of registers mentioned in
8823 ldm_regs_per_insn_1st registers; then each additional
8824 ldm_regs_per_insn_subsequent registers cost one more insn. The
8825 formula for N regs is thus:
8827 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
8828 + ldm_regs_per_insn_subsequent - 1)
8829 / ldm_regs_per_insn_subsequent).
8831 Additional costs may also be added for addressing. A similar
8832 formula is used for STM. */
8834 bool is_ldm
= load_multiple_operation (x
, SImode
);
8835 bool is_stm
= store_multiple_operation (x
, SImode
);
8837 if (is_ldm
|| is_stm
)
8841 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
8842 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
8843 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
8844 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
8845 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
8846 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
8847 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
8849 *cost
+= regs_per_insn_1st
8850 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
8851 + regs_per_insn_sub
- 1)
8852 / regs_per_insn_sub
);
8861 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8862 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8863 *cost
+= COSTS_N_INSNS (speed_p
8864 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
8865 else if (mode
== SImode
&& TARGET_IDIV
)
8866 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
8868 *cost
= LIBCALL_COST (2);
8869 return false; /* All arguments must be in registers. */
8872 /* MOD by a power of 2 can be expanded as:
8874 and r0, r0, #(n - 1)
8875 and r1, r1, #(n - 1)
8876 rsbpl r0, r1, #0. */
8877 if (CONST_INT_P (XEXP (x
, 1))
8878 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
8881 *cost
+= COSTS_N_INSNS (3);
8884 *cost
+= 2 * extra_cost
->alu
.logical
8885 + extra_cost
->alu
.arith
;
8891 *cost
= LIBCALL_COST (2);
8892 return false; /* All arguments must be in registers. */
8895 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8897 *cost
+= (COSTS_N_INSNS (1)
8898 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
8900 *cost
+= extra_cost
->alu
.shift_reg
;
8908 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8910 *cost
+= (COSTS_N_INSNS (2)
8911 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
8913 *cost
+= 2 * extra_cost
->alu
.shift
;
8916 else if (mode
== SImode
)
8918 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
8919 /* Slightly disparage register shifts at -Os, but not by much. */
8920 if (!CONST_INT_P (XEXP (x
, 1)))
8921 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
8922 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
8925 else if (GET_MODE_CLASS (mode
) == MODE_INT
8926 && GET_MODE_SIZE (mode
) < 4)
8930 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
8931 /* Slightly disparage register shifts at -Os, but not by
8933 if (!CONST_INT_P (XEXP (x
, 1)))
8934 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
8935 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
8937 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
8939 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
8941 /* Can use SBFX/UBFX. */
8943 *cost
+= extra_cost
->alu
.bfx
;
8944 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
8948 *cost
+= COSTS_N_INSNS (1);
8949 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
8952 if (CONST_INT_P (XEXP (x
, 1)))
8953 *cost
+= 2 * extra_cost
->alu
.shift
;
8955 *cost
+= (extra_cost
->alu
.shift
8956 + extra_cost
->alu
.shift_reg
);
8959 /* Slightly disparage register shifts. */
8960 *cost
+= !CONST_INT_P (XEXP (x
, 1));
8965 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
8966 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
8969 if (CONST_INT_P (XEXP (x
, 1)))
8970 *cost
+= (2 * extra_cost
->alu
.shift
8971 + extra_cost
->alu
.log_shift
);
8973 *cost
+= (extra_cost
->alu
.shift
8974 + extra_cost
->alu
.shift_reg
8975 + extra_cost
->alu
.log_shift_reg
);
8981 *cost
= LIBCALL_COST (2);
8990 *cost
+= extra_cost
->alu
.rev
;
8997 /* No rev instruction available. Look at arm_legacy_rev
8998 and thumb_legacy_rev for the form of RTL used then. */
9001 *cost
+= COSTS_N_INSNS (9);
9005 *cost
+= 6 * extra_cost
->alu
.shift
;
9006 *cost
+= 3 * extra_cost
->alu
.logical
;
9011 *cost
+= COSTS_N_INSNS (4);
9015 *cost
+= 2 * extra_cost
->alu
.shift
;
9016 *cost
+= extra_cost
->alu
.arith_shift
;
9017 *cost
+= 2 * extra_cost
->alu
.logical
;
9025 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9026 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9028 if (GET_CODE (XEXP (x
, 0)) == MULT
9029 || GET_CODE (XEXP (x
, 1)) == MULT
)
9031 rtx mul_op0
, mul_op1
, sub_op
;
9034 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9036 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9038 mul_op0
= XEXP (XEXP (x
, 0), 0);
9039 mul_op1
= XEXP (XEXP (x
, 0), 1);
9040 sub_op
= XEXP (x
, 1);
9044 mul_op0
= XEXP (XEXP (x
, 1), 0);
9045 mul_op1
= XEXP (XEXP (x
, 1), 1);
9046 sub_op
= XEXP (x
, 0);
9049 /* The first operand of the multiply may be optionally
9051 if (GET_CODE (mul_op0
) == NEG
)
9052 mul_op0
= XEXP (mul_op0
, 0);
9054 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9055 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9056 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9062 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9068 rtx shift_by_reg
= NULL
;
9072 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9073 if (shift_op
== NULL
)
9075 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9076 non_shift_op
= XEXP (x
, 0);
9079 non_shift_op
= XEXP (x
, 1);
9081 if (shift_op
!= NULL
)
9083 if (shift_by_reg
!= NULL
)
9086 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9087 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9090 *cost
+= extra_cost
->alu
.arith_shift
;
9092 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9093 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9098 && GET_CODE (XEXP (x
, 1)) == MULT
)
9102 *cost
+= extra_cost
->mult
[0].add
;
9103 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9104 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9105 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9109 if (CONST_INT_P (XEXP (x
, 0)))
9111 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9112 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9114 *cost
= COSTS_N_INSNS (insns
);
9116 *cost
+= insns
* extra_cost
->alu
.arith
;
9117 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9121 *cost
+= extra_cost
->alu
.arith
;
9126 if (GET_MODE_CLASS (mode
) == MODE_INT
9127 && GET_MODE_SIZE (mode
) < 4)
9129 rtx shift_op
, shift_reg
;
9132 /* We check both sides of the MINUS for shifter operands since,
9133 unlike PLUS, it's not commutative. */
9135 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9136 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9138 /* Slightly disparage, as we might need to widen the result. */
9141 *cost
+= extra_cost
->alu
.arith
;
9143 if (CONST_INT_P (XEXP (x
, 0)))
9145 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9154 *cost
+= COSTS_N_INSNS (1);
9156 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9158 rtx op1
= XEXP (x
, 1);
9161 *cost
+= 2 * extra_cost
->alu
.arith
;
9163 if (GET_CODE (op1
) == ZERO_EXTEND
)
9164 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9167 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9168 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9172 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9175 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9176 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9178 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9181 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9182 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9185 *cost
+= (extra_cost
->alu
.arith
9186 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9187 ? extra_cost
->alu
.arith
9188 : extra_cost
->alu
.arith_shift
));
9189 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9190 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9191 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9196 *cost
+= 2 * extra_cost
->alu
.arith
;
9202 *cost
= LIBCALL_COST (2);
9206 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9207 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9209 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9211 rtx mul_op0
, mul_op1
, add_op
;
9214 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9216 mul_op0
= XEXP (XEXP (x
, 0), 0);
9217 mul_op1
= XEXP (XEXP (x
, 0), 1);
9218 add_op
= XEXP (x
, 1);
9220 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9221 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9222 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9228 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9231 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9233 *cost
= LIBCALL_COST (2);
9237 /* Narrow modes can be synthesized in SImode, but the range
9238 of useful sub-operations is limited. Check for shift operations
9239 on one of the operands. Only left shifts can be used in the
9241 if (GET_MODE_CLASS (mode
) == MODE_INT
9242 && GET_MODE_SIZE (mode
) < 4)
9244 rtx shift_op
, shift_reg
;
9247 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9249 if (CONST_INT_P (XEXP (x
, 1)))
9251 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9252 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9254 *cost
= COSTS_N_INSNS (insns
);
9256 *cost
+= insns
* extra_cost
->alu
.arith
;
9257 /* Slightly penalize a narrow operation as the result may
9259 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9263 /* Slightly penalize a narrow operation as the result may
9267 *cost
+= extra_cost
->alu
.arith
;
9274 rtx shift_op
, shift_reg
;
9277 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9278 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9280 /* UXTA[BH] or SXTA[BH]. */
9282 *cost
+= extra_cost
->alu
.extend_arith
;
9283 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9285 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9290 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9291 if (shift_op
!= NULL
)
9296 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9297 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9300 *cost
+= extra_cost
->alu
.arith_shift
;
9302 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9303 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9306 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9308 rtx mul_op
= XEXP (x
, 0);
9310 if (TARGET_DSP_MULTIPLY
9311 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9312 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9313 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9314 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9315 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9316 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9317 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9318 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9319 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9320 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9321 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9322 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9327 *cost
+= extra_cost
->mult
[0].extend_add
;
9328 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9329 SIGN_EXTEND
, 0, speed_p
)
9330 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9331 SIGN_EXTEND
, 0, speed_p
)
9332 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9337 *cost
+= extra_cost
->mult
[0].add
;
9338 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9339 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9340 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9343 if (CONST_INT_P (XEXP (x
, 1)))
9345 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9346 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9348 *cost
= COSTS_N_INSNS (insns
);
9350 *cost
+= insns
* extra_cost
->alu
.arith
;
9351 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9355 *cost
+= extra_cost
->alu
.arith
;
9363 && GET_CODE (XEXP (x
, 0)) == MULT
9364 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9365 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9366 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9367 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9370 *cost
+= extra_cost
->mult
[1].extend_add
;
9371 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9372 ZERO_EXTEND
, 0, speed_p
)
9373 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9374 ZERO_EXTEND
, 0, speed_p
)
9375 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9379 *cost
+= COSTS_N_INSNS (1);
9381 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9382 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9385 *cost
+= (extra_cost
->alu
.arith
9386 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9387 ? extra_cost
->alu
.arith
9388 : extra_cost
->alu
.arith_shift
));
9390 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9392 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9397 *cost
+= 2 * extra_cost
->alu
.arith
;
9402 *cost
= LIBCALL_COST (2);
9405 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9408 *cost
+= extra_cost
->alu
.rev
;
9416 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9417 rtx op0
= XEXP (x
, 0);
9418 rtx shift_op
, shift_reg
;
9422 || (code
== IOR
&& TARGET_THUMB2
)))
9423 op0
= XEXP (op0
, 0);
9426 shift_op
= shifter_op_p (op0
, &shift_reg
);
9427 if (shift_op
!= NULL
)
9432 *cost
+= extra_cost
->alu
.log_shift_reg
;
9433 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9436 *cost
+= extra_cost
->alu
.log_shift
;
9438 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9439 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9443 if (CONST_INT_P (XEXP (x
, 1)))
9445 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9446 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9449 *cost
= COSTS_N_INSNS (insns
);
9451 *cost
+= insns
* extra_cost
->alu
.logical
;
9452 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9457 *cost
+= extra_cost
->alu
.logical
;
9458 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9459 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9465 rtx op0
= XEXP (x
, 0);
9466 enum rtx_code subcode
= GET_CODE (op0
);
9468 *cost
+= COSTS_N_INSNS (1);
9472 || (code
== IOR
&& TARGET_THUMB2
)))
9473 op0
= XEXP (op0
, 0);
9475 if (GET_CODE (op0
) == ZERO_EXTEND
)
9478 *cost
+= 2 * extra_cost
->alu
.logical
;
9480 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9482 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9485 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9488 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9490 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9492 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9497 *cost
+= 2 * extra_cost
->alu
.logical
;
9503 *cost
= LIBCALL_COST (2);
9507 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9508 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9510 rtx op0
= XEXP (x
, 0);
9512 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
9513 op0
= XEXP (op0
, 0);
9516 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9518 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
9519 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
9522 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9524 *cost
= LIBCALL_COST (2);
9530 if (TARGET_DSP_MULTIPLY
9531 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9532 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9533 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9534 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9535 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9536 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9537 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9538 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9539 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9540 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9541 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9542 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9547 *cost
+= extra_cost
->mult
[0].extend
;
9548 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
9549 SIGN_EXTEND
, 0, speed_p
);
9550 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
9551 SIGN_EXTEND
, 1, speed_p
);
9555 *cost
+= extra_cost
->mult
[0].simple
;
9562 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9563 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9564 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9565 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9568 *cost
+= extra_cost
->mult
[1].extend
;
9569 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
9570 ZERO_EXTEND
, 0, speed_p
)
9571 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9572 ZERO_EXTEND
, 0, speed_p
));
9576 *cost
= LIBCALL_COST (2);
9581 *cost
= LIBCALL_COST (2);
9585 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9586 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9588 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9591 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
9596 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9600 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9602 *cost
= LIBCALL_COST (1);
9608 if (GET_CODE (XEXP (x
, 0)) == ABS
)
9610 *cost
+= COSTS_N_INSNS (1);
9611 /* Assume the non-flag-changing variant. */
9613 *cost
+= (extra_cost
->alu
.log_shift
9614 + extra_cost
->alu
.arith_shift
);
9615 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
9619 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
9620 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
9622 *cost
+= COSTS_N_INSNS (1);
9623 /* No extra cost for MOV imm and MVN imm. */
9624 /* If the comparison op is using the flags, there's no further
9625 cost, otherwise we need to add the cost of the comparison. */
9626 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
9627 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
9628 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
9630 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
9631 *cost
+= (COSTS_N_INSNS (1)
9632 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
9634 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
9637 *cost
+= extra_cost
->alu
.arith
;
9643 *cost
+= extra_cost
->alu
.arith
;
9647 if (GET_MODE_CLASS (mode
) == MODE_INT
9648 && GET_MODE_SIZE (mode
) < 4)
9650 /* Slightly disparage, as we might need an extend operation. */
9653 *cost
+= extra_cost
->alu
.arith
;
9659 *cost
+= COSTS_N_INSNS (1);
9661 *cost
+= 2 * extra_cost
->alu
.arith
;
9666 *cost
= LIBCALL_COST (1);
9673 rtx shift_reg
= NULL
;
9675 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9679 if (shift_reg
!= NULL
)
9682 *cost
+= extra_cost
->alu
.log_shift_reg
;
9683 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9686 *cost
+= extra_cost
->alu
.log_shift
;
9687 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
9692 *cost
+= extra_cost
->alu
.logical
;
9697 *cost
+= COSTS_N_INSNS (1);
9703 *cost
+= LIBCALL_COST (1);
9708 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9710 *cost
+= COSTS_N_INSNS (3);
9713 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
9714 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
9716 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
9717 /* Assume that if one arm of the if_then_else is a register,
9718 that it will be tied with the result and eliminate the
9719 conditional insn. */
9720 if (REG_P (XEXP (x
, 1)))
9722 else if (REG_P (XEXP (x
, 2)))
9728 if (extra_cost
->alu
.non_exec_costs_exec
)
9729 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
9731 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
9734 *cost
+= op1cost
+ op2cost
;
9740 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
9744 machine_mode op0mode
;
9745 /* We'll mostly assume that the cost of a compare is the cost of the
9746 LHS. However, there are some notable exceptions. */
9748 /* Floating point compares are never done as side-effects. */
9749 op0mode
= GET_MODE (XEXP (x
, 0));
9750 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
9751 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9754 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
9756 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
9758 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
9764 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
9766 *cost
= LIBCALL_COST (2);
9770 /* DImode compares normally take two insns. */
9771 if (op0mode
== DImode
)
9773 *cost
+= COSTS_N_INSNS (1);
9775 *cost
+= 2 * extra_cost
->alu
.arith
;
9779 if (op0mode
== SImode
)
9784 if (XEXP (x
, 1) == const0_rtx
9785 && !(REG_P (XEXP (x
, 0))
9786 || (GET_CODE (XEXP (x
, 0)) == SUBREG
9787 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
9789 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
9791 /* Multiply operations that set the flags are often
9792 significantly more expensive. */
9794 && GET_CODE (XEXP (x
, 0)) == MULT
9795 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
9796 *cost
+= extra_cost
->mult
[0].flag_setting
;
9799 && GET_CODE (XEXP (x
, 0)) == PLUS
9800 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
9801 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
9803 *cost
+= extra_cost
->mult
[0].flag_setting
;
9808 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9809 if (shift_op
!= NULL
)
9811 if (shift_reg
!= NULL
)
9813 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
9816 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9819 *cost
+= extra_cost
->alu
.arith_shift
;
9820 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
9821 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
9826 *cost
+= extra_cost
->alu
.arith
;
9827 if (CONST_INT_P (XEXP (x
, 1))
9828 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
9830 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
9838 *cost
= LIBCALL_COST (2);
9861 if (outer_code
== SET
)
9863 /* Is it a store-flag operation? */
9864 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
9865 && XEXP (x
, 1) == const0_rtx
)
9867 /* Thumb also needs an IT insn. */
9868 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
9871 if (XEXP (x
, 1) == const0_rtx
)
9876 /* LSR Rd, Rn, #31. */
9878 *cost
+= extra_cost
->alu
.shift
;
9888 *cost
+= COSTS_N_INSNS (1);
9892 /* RSBS T1, Rn, Rn, LSR #31
9894 *cost
+= COSTS_N_INSNS (1);
9896 *cost
+= extra_cost
->alu
.arith_shift
;
9900 /* RSB Rd, Rn, Rn, ASR #1
9902 *cost
+= COSTS_N_INSNS (1);
9904 *cost
+= (extra_cost
->alu
.arith_shift
9905 + extra_cost
->alu
.shift
);
9911 *cost
+= COSTS_N_INSNS (1);
9913 *cost
+= extra_cost
->alu
.shift
;
9917 /* Remaining cases are either meaningless or would take
9918 three insns anyway. */
9919 *cost
= COSTS_N_INSNS (3);
9922 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9927 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
9928 if (CONST_INT_P (XEXP (x
, 1))
9929 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
9931 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9938 /* Not directly inside a set. If it involves the condition code
9939 register it must be the condition for a branch, cond_exec or
9940 I_T_E operation. Since the comparison is performed elsewhere
9941 this is just the control part which has no additional
9943 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
9944 && XEXP (x
, 1) == const0_rtx
)
9952 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9953 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9956 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9960 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9962 *cost
= LIBCALL_COST (1);
9969 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
9973 *cost
= LIBCALL_COST (1);
9977 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
9978 && MEM_P (XEXP (x
, 0)))
9981 *cost
+= COSTS_N_INSNS (1);
9986 if (GET_MODE (XEXP (x
, 0)) == SImode
)
9987 *cost
+= extra_cost
->ldst
.load
;
9989 *cost
+= extra_cost
->ldst
.load_sign_extend
;
9992 *cost
+= extra_cost
->alu
.shift
;
9997 /* Widening from less than 32-bits requires an extend operation. */
9998 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10000 /* We have SXTB/SXTH. */
10001 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10003 *cost
+= extra_cost
->alu
.extend
;
10005 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10007 /* Needs two shifts. */
10008 *cost
+= COSTS_N_INSNS (1);
10009 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10011 *cost
+= 2 * extra_cost
->alu
.shift
;
10014 /* Widening beyond 32-bits requires one more insn. */
10015 if (mode
== DImode
)
10017 *cost
+= COSTS_N_INSNS (1);
10019 *cost
+= extra_cost
->alu
.shift
;
10026 || GET_MODE (XEXP (x
, 0)) == SImode
10027 || GET_MODE (XEXP (x
, 0)) == QImode
)
10028 && MEM_P (XEXP (x
, 0)))
10030 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10032 if (mode
== DImode
)
10033 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10038 /* Widening from less than 32-bits requires an extend operation. */
10039 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10041 /* UXTB can be a shorter instruction in Thumb2, but it might
10042 be slower than the AND Rd, Rn, #255 alternative. When
10043 optimizing for speed it should never be slower to use
10044 AND, and we don't really model 16-bit vs 32-bit insns
10047 *cost
+= extra_cost
->alu
.logical
;
10049 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10051 /* We have UXTB/UXTH. */
10052 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10054 *cost
+= extra_cost
->alu
.extend
;
10056 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10058 /* Needs two shifts. It's marginally preferable to use
10059 shifts rather than two BIC instructions as the second
10060 shift may merge with a subsequent insn as a shifter
10062 *cost
= COSTS_N_INSNS (2);
10063 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10065 *cost
+= 2 * extra_cost
->alu
.shift
;
10068 /* Widening beyond 32-bits requires one more insn. */
10069 if (mode
== DImode
)
10071 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10078 /* CONST_INT has no mode, so we cannot tell for sure how many
10079 insns are really going to be needed. The best we can do is
10080 look at the value passed. If it fits in SImode, then assume
10081 that's the mode it will be used for. Otherwise assume it
10082 will be used in DImode. */
10083 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10088 /* Avoid blowing up in arm_gen_constant (). */
10089 if (!(outer_code
== PLUS
10090 || outer_code
== AND
10091 || outer_code
== IOR
10092 || outer_code
== XOR
10093 || outer_code
== MINUS
))
10097 if (mode
== SImode
)
10099 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10100 INTVAL (x
), NULL
, NULL
,
10106 *cost
+= COSTS_N_INSNS (arm_gen_constant
10107 (outer_code
, SImode
, NULL
,
10108 trunc_int_for_mode (INTVAL (x
), SImode
),
10110 + arm_gen_constant (outer_code
, SImode
, NULL
,
10111 INTVAL (x
) >> 32, NULL
,
10123 if (arm_arch_thumb2
&& !flag_pic
)
10124 *cost
+= COSTS_N_INSNS (1);
10126 *cost
+= extra_cost
->ldst
.load
;
10129 *cost
+= COSTS_N_INSNS (1);
10133 *cost
+= COSTS_N_INSNS (1);
10135 *cost
+= extra_cost
->alu
.arith
;
10141 *cost
= COSTS_N_INSNS (4);
10146 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10147 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10149 if (vfp3_const_double_rtx (x
))
10152 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10158 if (mode
== DFmode
)
10159 *cost
+= extra_cost
->ldst
.loadd
;
10161 *cost
+= extra_cost
->ldst
.loadf
;
10164 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10168 *cost
= COSTS_N_INSNS (4);
10174 && TARGET_HARD_FLOAT
10175 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10176 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10177 *cost
= COSTS_N_INSNS (1);
10179 *cost
= COSTS_N_INSNS (4);
10184 /* When optimizing for size, we prefer constant pool entries to
10185 MOVW/MOVT pairs, so bump the cost of these slightly. */
10192 *cost
+= extra_cost
->alu
.clz
;
10196 if (XEXP (x
, 1) == const0_rtx
)
10199 *cost
+= extra_cost
->alu
.log_shift
;
10200 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10203 /* Fall through. */
10207 *cost
+= COSTS_N_INSNS (1);
10211 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10212 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10213 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10214 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10215 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10216 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10217 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10218 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10222 *cost
+= extra_cost
->mult
[1].extend
;
10223 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10224 ZERO_EXTEND
, 0, speed_p
)
10225 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10226 ZERO_EXTEND
, 0, speed_p
));
10229 *cost
= LIBCALL_COST (1);
10232 case UNSPEC_VOLATILE
:
10234 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10237 /* Reading the PC is like reading any other register. Writing it
10238 is more expensive, but we take that into account elsewhere. */
10243 /* TODO: Simple zero_extract of bottom bits using AND. */
10244 /* Fall through. */
10248 && CONST_INT_P (XEXP (x
, 1))
10249 && CONST_INT_P (XEXP (x
, 2)))
10252 *cost
+= extra_cost
->alu
.bfx
;
10253 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10256 /* Without UBFX/SBFX, need to resort to shift operations. */
10257 *cost
+= COSTS_N_INSNS (1);
10259 *cost
+= 2 * extra_cost
->alu
.shift
;
10260 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10264 if (TARGET_HARD_FLOAT
)
10267 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10268 if (!TARGET_FPU_ARMV8
10269 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10271 /* Pre v8, widening HF->DF is a two-step process, first
10272 widening to SFmode. */
10273 *cost
+= COSTS_N_INSNS (1);
10275 *cost
+= extra_cost
->fp
[0].widen
;
10277 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10281 *cost
= LIBCALL_COST (1);
10284 case FLOAT_TRUNCATE
:
10285 if (TARGET_HARD_FLOAT
)
10288 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10289 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10291 /* Vector modes? */
10293 *cost
= LIBCALL_COST (1);
10297 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10299 rtx op0
= XEXP (x
, 0);
10300 rtx op1
= XEXP (x
, 1);
10301 rtx op2
= XEXP (x
, 2);
10304 /* vfms or vfnma. */
10305 if (GET_CODE (op0
) == NEG
)
10306 op0
= XEXP (op0
, 0);
10308 /* vfnms or vfnma. */
10309 if (GET_CODE (op2
) == NEG
)
10310 op2
= XEXP (op2
, 0);
10312 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10313 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10314 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10317 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10322 *cost
= LIBCALL_COST (3);
10327 if (TARGET_HARD_FLOAT
)
10329 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10330 a vcvt fixed-point conversion. */
10331 if (code
== FIX
&& mode
== SImode
10332 && GET_CODE (XEXP (x
, 0)) == FIX
10333 && GET_MODE (XEXP (x
, 0)) == SFmode
10334 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10335 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10339 *cost
+= extra_cost
->fp
[0].toint
;
10341 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10346 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10348 mode
= GET_MODE (XEXP (x
, 0));
10350 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10351 /* Strip of the 'cost' of rounding towards zero. */
10352 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10353 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10356 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10357 /* ??? Increase the cost to deal with transferring from
10358 FP -> CORE registers? */
10361 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10362 && TARGET_FPU_ARMV8
)
10365 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10368 /* Vector costs? */
10370 *cost
= LIBCALL_COST (1);
10374 case UNSIGNED_FLOAT
:
10375 if (TARGET_HARD_FLOAT
)
10377 /* ??? Increase the cost to deal with transferring from CORE
10378 -> FP registers? */
10380 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10383 *cost
= LIBCALL_COST (1);
10391 /* Just a guess. Guess number of instructions in the asm
10392 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10393 though (see PR60663). */
10394 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10395 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10397 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10401 if (mode
!= VOIDmode
)
10402 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10404 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10409 #undef HANDLE_NARROW_SHIFT_ARITH
10411 /* RTX costs entry point. */
10414 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10415 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10418 int code
= GET_CODE (x
);
10419 gcc_assert (current_tune
->insn_extra_cost
);
10421 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10422 (enum rtx_code
) outer_code
,
10423 current_tune
->insn_extra_cost
,
10426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10428 print_rtl_single (dump_file
, x
);
10429 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10430 *total
, result
? "final" : "partial");
10435 /* All address computations that can be done are free, but rtx cost returns
10436 the same for practically all of them. So we weight the different types
10437 of address here in the order (most pref first):
10438 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10440 arm_arm_address_cost (rtx x
)
10442 enum rtx_code c
= GET_CODE (x
);
10444 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10446 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10451 if (CONST_INT_P (XEXP (x
, 1)))
10454 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10464 arm_thumb_address_cost (rtx x
)
10466 enum rtx_code c
= GET_CODE (x
);
10471 && REG_P (XEXP (x
, 0))
10472 && CONST_INT_P (XEXP (x
, 1)))
10479 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10480 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10482 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10485 /* Adjust cost hook for XScale. */
10487 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10490 /* Some true dependencies can have a higher cost depending
10491 on precisely how certain input operands are used. */
10493 && recog_memoized (insn
) >= 0
10494 && recog_memoized (dep
) >= 0)
10496 int shift_opnum
= get_attr_shift (insn
);
10497 enum attr_type attr_type
= get_attr_type (dep
);
10499 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10500 operand for INSN. If we have a shifted input operand and the
10501 instruction we depend on is another ALU instruction, then we may
10502 have to account for an additional stall. */
10503 if (shift_opnum
!= 0
10504 && (attr_type
== TYPE_ALU_SHIFT_IMM
10505 || attr_type
== TYPE_ALUS_SHIFT_IMM
10506 || attr_type
== TYPE_LOGIC_SHIFT_IMM
10507 || attr_type
== TYPE_LOGICS_SHIFT_IMM
10508 || attr_type
== TYPE_ALU_SHIFT_REG
10509 || attr_type
== TYPE_ALUS_SHIFT_REG
10510 || attr_type
== TYPE_LOGIC_SHIFT_REG
10511 || attr_type
== TYPE_LOGICS_SHIFT_REG
10512 || attr_type
== TYPE_MOV_SHIFT
10513 || attr_type
== TYPE_MVN_SHIFT
10514 || attr_type
== TYPE_MOV_SHIFT_REG
10515 || attr_type
== TYPE_MVN_SHIFT_REG
))
10517 rtx shifted_operand
;
10520 /* Get the shifted operand. */
10521 extract_insn (insn
);
10522 shifted_operand
= recog_data
.operand
[shift_opnum
];
10524 /* Iterate over all the operands in DEP. If we write an operand
10525 that overlaps with SHIFTED_OPERAND, then we have increase the
10526 cost of this dependency. */
10527 extract_insn (dep
);
10528 preprocess_constraints (dep
);
10529 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
10531 /* We can ignore strict inputs. */
10532 if (recog_data
.operand_type
[opno
] == OP_IN
)
10535 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
10547 /* Adjust cost hook for Cortex A9. */
10549 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10559 case REG_DEP_OUTPUT
:
10560 if (recog_memoized (insn
) >= 0
10561 && recog_memoized (dep
) >= 0)
10563 if (GET_CODE (PATTERN (insn
)) == SET
)
10566 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
10568 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
10570 enum attr_type attr_type_insn
= get_attr_type (insn
);
10571 enum attr_type attr_type_dep
= get_attr_type (dep
);
10573 /* By default all dependencies of the form
10576 have an extra latency of 1 cycle because
10577 of the input and output dependency in this
10578 case. However this gets modeled as an true
10579 dependency and hence all these checks. */
10580 if (REG_P (SET_DEST (PATTERN (insn
)))
10581 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
10583 /* FMACS is a special case where the dependent
10584 instruction can be issued 3 cycles before
10585 the normal latency in case of an output
10587 if ((attr_type_insn
== TYPE_FMACS
10588 || attr_type_insn
== TYPE_FMACD
)
10589 && (attr_type_dep
== TYPE_FMACS
10590 || attr_type_dep
== TYPE_FMACD
))
10592 if (dep_type
== REG_DEP_OUTPUT
)
10593 *cost
= insn_default_latency (dep
) - 3;
10595 *cost
= insn_default_latency (dep
);
10600 if (dep_type
== REG_DEP_OUTPUT
)
10601 *cost
= insn_default_latency (dep
) + 1;
10603 *cost
= insn_default_latency (dep
);
10613 gcc_unreachable ();
10619 /* Adjust cost hook for FA726TE. */
10621 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10624 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
10625 have penalty of 3. */
10626 if (dep_type
== REG_DEP_TRUE
10627 && recog_memoized (insn
) >= 0
10628 && recog_memoized (dep
) >= 0
10629 && get_attr_conds (dep
) == CONDS_SET
)
10631 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
10632 if (get_attr_conds (insn
) == CONDS_USE
10633 && get_attr_type (insn
) != TYPE_BRANCH
)
10639 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
10640 || get_attr_conds (insn
) == CONDS_USE
)
10650 /* Implement TARGET_REGISTER_MOVE_COST.
10652 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
10653 it is typically more expensive than a single memory access. We set
10654 the cost to less than two memory accesses so that floating
10655 point to integer conversion does not go through memory. */
10658 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
10659 reg_class_t from
, reg_class_t to
)
10663 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
10664 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
10666 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
10667 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
10669 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
10676 if (from
== HI_REGS
|| to
== HI_REGS
)
10683 /* Implement TARGET_MEMORY_MOVE_COST. */
10686 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
10687 bool in ATTRIBUTE_UNUSED
)
10693 if (GET_MODE_SIZE (mode
) < 4)
10696 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
10700 /* Vectorizer cost model implementation. */
10702 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10704 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
10706 int misalign ATTRIBUTE_UNUSED
)
10710 switch (type_of_cost
)
10713 return current_tune
->vec_costs
->scalar_stmt_cost
;
10716 return current_tune
->vec_costs
->scalar_load_cost
;
10719 return current_tune
->vec_costs
->scalar_store_cost
;
10722 return current_tune
->vec_costs
->vec_stmt_cost
;
10725 return current_tune
->vec_costs
->vec_align_load_cost
;
10728 return current_tune
->vec_costs
->vec_store_cost
;
10730 case vec_to_scalar
:
10731 return current_tune
->vec_costs
->vec_to_scalar_cost
;
10733 case scalar_to_vec
:
10734 return current_tune
->vec_costs
->scalar_to_vec_cost
;
10736 case unaligned_load
:
10737 return current_tune
->vec_costs
->vec_unalign_load_cost
;
10739 case unaligned_store
:
10740 return current_tune
->vec_costs
->vec_unalign_store_cost
;
10742 case cond_branch_taken
:
10743 return current_tune
->vec_costs
->cond_taken_branch_cost
;
10745 case cond_branch_not_taken
:
10746 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
10749 case vec_promote_demote
:
10750 return current_tune
->vec_costs
->vec_stmt_cost
;
10752 case vec_construct
:
10753 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
10754 return elements
/ 2 + 1;
10757 gcc_unreachable ();
10761 /* Implement targetm.vectorize.add_stmt_cost. */
10764 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
10765 struct _stmt_vec_info
*stmt_info
, int misalign
,
10766 enum vect_cost_model_location where
)
10768 unsigned *cost
= (unsigned *) data
;
10769 unsigned retval
= 0;
10771 if (flag_vect_cost_model
)
10773 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
10774 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
10776 /* Statements in an inner loop relative to the loop being
10777 vectorized are weighted more heavily. The value here is
10778 arbitrary and could potentially be improved with analysis. */
10779 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
10780 count
*= 50; /* FIXME. */
10782 retval
= (unsigned) (count
* stmt_cost
);
10783 cost
[where
] += retval
;
10789 /* Return true if and only if this insn can dual-issue only as older. */
10791 cortexa7_older_only (rtx_insn
*insn
)
10793 if (recog_memoized (insn
) < 0)
10796 switch (get_attr_type (insn
))
10798 case TYPE_ALU_DSP_REG
:
10799 case TYPE_ALU_SREG
:
10800 case TYPE_ALUS_SREG
:
10801 case TYPE_LOGIC_REG
:
10802 case TYPE_LOGICS_REG
:
10804 case TYPE_ADCS_REG
:
10809 case TYPE_SHIFT_IMM
:
10810 case TYPE_SHIFT_REG
:
10811 case TYPE_LOAD_BYTE
:
10814 case TYPE_FFARITHS
:
10816 case TYPE_FFARITHD
:
10834 case TYPE_F_STORES
:
10841 /* Return true if and only if this insn can dual-issue as younger. */
10843 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
10845 if (recog_memoized (insn
) < 0)
10848 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
10852 switch (get_attr_type (insn
))
10855 case TYPE_ALUS_IMM
:
10856 case TYPE_LOGIC_IMM
:
10857 case TYPE_LOGICS_IMM
:
10862 case TYPE_MOV_SHIFT
:
10863 case TYPE_MOV_SHIFT_REG
:
10873 /* Look for an instruction that can dual issue only as an older
10874 instruction, and move it in front of any instructions that can
10875 dual-issue as younger, while preserving the relative order of all
10876 other instructions in the ready list. This is a hueuristic to help
10877 dual-issue in later cycles, by postponing issue of more flexible
10878 instructions. This heuristic may affect dual issue opportunities
10879 in the current cycle. */
10881 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
10882 int *n_readyp
, int clock
)
10885 int first_older_only
= -1, first_younger
= -1;
10889 ";; sched_reorder for cycle %d with %d insns in ready list\n",
10893 /* Traverse the ready list from the head (the instruction to issue
10894 first), and looking for the first instruction that can issue as
10895 younger and the first instruction that can dual-issue only as
10897 for (i
= *n_readyp
- 1; i
>= 0; i
--)
10899 rtx_insn
*insn
= ready
[i
];
10900 if (cortexa7_older_only (insn
))
10902 first_older_only
= i
;
10904 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
10907 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
10911 /* Nothing to reorder because either no younger insn found or insn
10912 that can dual-issue only as older appears before any insn that
10913 can dual-issue as younger. */
10914 if (first_younger
== -1)
10917 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
10921 /* Nothing to reorder because no older-only insn in the ready list. */
10922 if (first_older_only
== -1)
10925 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
10929 /* Move first_older_only insn before first_younger. */
10931 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
10932 INSN_UID(ready
[first_older_only
]),
10933 INSN_UID(ready
[first_younger
]));
10934 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
10935 for (i
= first_older_only
; i
< first_younger
; i
++)
10937 ready
[i
] = ready
[i
+1];
10940 ready
[i
] = first_older_only_insn
;
10944 /* Implement TARGET_SCHED_REORDER. */
10946 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
10951 case TARGET_CPU_cortexa7
:
10952 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
10955 /* Do nothing for other cores. */
10959 return arm_issue_rate ();
10962 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
10963 It corrects the value of COST based on the relationship between
10964 INSN and DEP through the dependence LINK. It returns the new
10965 value. There is a per-core adjust_cost hook to adjust scheduler costs
10966 and the per-core hook can choose to completely override the generic
10967 adjust_cost function. Only put bits of code into arm_adjust_cost that
10968 are common across all cores. */
10970 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
10975 /* When generating Thumb-1 code, we want to place flag-setting operations
10976 close to a conditional branch which depends on them, so that we can
10977 omit the comparison. */
10980 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
10981 && recog_memoized (dep
) >= 0
10982 && get_attr_conds (dep
) == CONDS_SET
)
10985 if (current_tune
->sched_adjust_cost
!= NULL
)
10987 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
10991 /* XXX Is this strictly true? */
10992 if (dep_type
== REG_DEP_ANTI
10993 || dep_type
== REG_DEP_OUTPUT
)
10996 /* Call insns don't incur a stall, even if they follow a load. */
11001 if ((i_pat
= single_set (insn
)) != NULL
11002 && MEM_P (SET_SRC (i_pat
))
11003 && (d_pat
= single_set (dep
)) != NULL
11004 && MEM_P (SET_DEST (d_pat
)))
11006 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11007 /* This is a load after a store, there is no conflict if the load reads
11008 from a cached area. Assume that loads from the stack, and from the
11009 constant pool are cached, and that others will miss. This is a
11012 if ((GET_CODE (src_mem
) == SYMBOL_REF
11013 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11014 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11015 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11016 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11024 arm_max_conditional_execute (void)
11026 return max_insns_skipped
;
11030 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11033 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11035 return (optimize
> 0) ? 2 : 0;
11039 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11041 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11044 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11045 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11046 sequences of non-executed instructions in IT blocks probably take the same
11047 amount of time as executed instructions (and the IT instruction itself takes
11048 space in icache). This function was experimentally determined to give good
11049 results on a popular embedded benchmark. */
11052 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11054 return (TARGET_32BIT
&& speed_p
) ? 1
11055 : arm_default_branch_cost (speed_p
, predictable_p
);
11059 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11061 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11064 static bool fp_consts_inited
= false;
11066 static REAL_VALUE_TYPE value_fp0
;
11069 init_fp_table (void)
11073 r
= REAL_VALUE_ATOF ("0", DFmode
);
11075 fp_consts_inited
= true;
11078 /* Return TRUE if rtx X is a valid immediate FP constant. */
11080 arm_const_double_rtx (rtx x
)
11082 const REAL_VALUE_TYPE
*r
;
11084 if (!fp_consts_inited
)
11087 r
= CONST_DOUBLE_REAL_VALUE (x
);
11088 if (REAL_VALUE_MINUS_ZERO (*r
))
11091 if (real_equal (r
, &value_fp0
))
11097 /* VFPv3 has a fairly wide range of representable immediates, formed from
11098 "quarter-precision" floating-point values. These can be evaluated using this
11099 formula (with ^ for exponentiation):
11103 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11104 16 <= n <= 31 and 0 <= r <= 7.
11106 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11108 - A (most-significant) is the sign bit.
11109 - BCD are the exponent (encoded as r XOR 3).
11110 - EFGH are the mantissa (encoded as n - 16).
11113 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11114 fconst[sd] instruction, or -1 if X isn't suitable. */
11116 vfp3_const_double_index (rtx x
)
11118 REAL_VALUE_TYPE r
, m
;
11119 int sign
, exponent
;
11120 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11121 unsigned HOST_WIDE_INT mask
;
11122 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11125 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11128 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11130 /* We can't represent these things, so detect them first. */
11131 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11134 /* Extract sign, exponent and mantissa. */
11135 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11136 r
= real_value_abs (&r
);
11137 exponent
= REAL_EXP (&r
);
11138 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11139 highest (sign) bit, with a fixed binary point at bit point_pos.
11140 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11141 bits for the mantissa, this may fail (low bits would be lost). */
11142 real_ldexp (&m
, &r
, point_pos
- exponent
);
11143 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11144 mantissa
= w
.elt (0);
11145 mant_hi
= w
.elt (1);
11147 /* If there are bits set in the low part of the mantissa, we can't
11148 represent this value. */
11152 /* Now make it so that mantissa contains the most-significant bits, and move
11153 the point_pos to indicate that the least-significant bits have been
11155 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11156 mantissa
= mant_hi
;
11158 /* We can permit four significant bits of mantissa only, plus a high bit
11159 which is always 1. */
11160 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11161 if ((mantissa
& mask
) != 0)
11164 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11165 mantissa
>>= point_pos
- 5;
11167 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11168 floating-point immediate zero with Neon using an integer-zero load, but
11169 that case is handled elsewhere.) */
11173 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11175 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11176 normalized significands are in the range [1, 2). (Our mantissa is shifted
11177 left 4 places at this point relative to normalized IEEE754 values). GCC
11178 internally uses [0.5, 1) (see real.c), so the exponent returned from
11179 REAL_EXP must be altered. */
11180 exponent
= 5 - exponent
;
11182 if (exponent
< 0 || exponent
> 7)
11185 /* Sign, mantissa and exponent are now in the correct form to plug into the
11186 formula described in the comment above. */
11187 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11190 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11192 vfp3_const_double_rtx (rtx x
)
11197 return vfp3_const_double_index (x
) != -1;
11200 /* Recognize immediates which can be used in various Neon instructions. Legal
11201 immediates are described by the following table (for VMVN variants, the
11202 bitwise inverse of the constant shown is recognized. In either case, VMOV
11203 is output and the correct instruction to use for a given constant is chosen
11204 by the assembler). The constant shown is replicated across all elements of
11205 the destination vector.
11207 insn elems variant constant (binary)
11208 ---- ----- ------- -----------------
11209 vmov i32 0 00000000 00000000 00000000 abcdefgh
11210 vmov i32 1 00000000 00000000 abcdefgh 00000000
11211 vmov i32 2 00000000 abcdefgh 00000000 00000000
11212 vmov i32 3 abcdefgh 00000000 00000000 00000000
11213 vmov i16 4 00000000 abcdefgh
11214 vmov i16 5 abcdefgh 00000000
11215 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11216 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11217 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11218 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11219 vmvn i16 10 00000000 abcdefgh
11220 vmvn i16 11 abcdefgh 00000000
11221 vmov i32 12 00000000 00000000 abcdefgh 11111111
11222 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11223 vmov i32 14 00000000 abcdefgh 11111111 11111111
11224 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11225 vmov i8 16 abcdefgh
11226 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11227 eeeeeeee ffffffff gggggggg hhhhhhhh
11228 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11229 vmov f32 19 00000000 00000000 00000000 00000000
11231 For case 18, B = !b. Representable values are exactly those accepted by
11232 vfp3_const_double_index, but are output as floating-point numbers rather
11235 For case 19, we will change it to vmov.i32 when assembling.
11237 Variants 0-5 (inclusive) may also be used as immediates for the second
11238 operand of VORR/VBIC instructions.
11240 The INVERSE argument causes the bitwise inverse of the given operand to be
11241 recognized instead (used for recognizing legal immediates for the VAND/VORN
11242 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11243 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11244 output, rather than the real insns vbic/vorr).
11246 INVERSE makes no difference to the recognition of float vectors.
11248 The return value is the variant of immediate as shown in the above table, or
11249 -1 if the given value doesn't match any of the listed patterns.
11252 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11253 rtx
*modconst
, int *elementwidth
)
11255 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11257 for (i = 0; i < idx; i += (STRIDE)) \
11262 immtype = (CLASS); \
11263 elsize = (ELSIZE); \
11267 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11268 unsigned int innersize
;
11269 unsigned char bytes
[16];
11270 int immtype
= -1, matches
;
11271 unsigned int invmask
= inverse
? 0xff : 0;
11272 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11275 n_elts
= CONST_VECTOR_NUNITS (op
);
11279 if (mode
== VOIDmode
)
11283 innersize
= GET_MODE_UNIT_SIZE (mode
);
11285 /* Vectors of float constants. */
11286 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11288 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11290 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11293 /* FP16 vectors cannot be represented. */
11294 if (GET_MODE_INNER (mode
) == HFmode
)
11297 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11298 are distinct in this context. */
11299 if (!const_vec_duplicate_p (op
))
11303 *modconst
= CONST_VECTOR_ELT (op
, 0);
11308 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11314 /* Splat vector constant out into a byte vector. */
11315 for (i
= 0; i
< n_elts
; i
++)
11317 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11318 unsigned HOST_WIDE_INT elpart
;
11320 gcc_assert (CONST_INT_P (el
));
11321 elpart
= INTVAL (el
);
11323 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11325 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11326 elpart
>>= BITS_PER_UNIT
;
11330 /* Sanity check. */
11331 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11335 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11336 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11338 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11339 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11341 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11342 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11344 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11345 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11347 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11349 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11351 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11352 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11354 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11355 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11357 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11358 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11360 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11361 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11363 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11365 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11367 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11368 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11370 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11371 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11373 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11374 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11376 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11377 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11379 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11381 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11382 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11390 *elementwidth
= elsize
;
11394 unsigned HOST_WIDE_INT imm
= 0;
11396 /* Un-invert bytes of recognized vector, if necessary. */
11398 for (i
= 0; i
< idx
; i
++)
11399 bytes
[i
] ^= invmask
;
11403 /* FIXME: Broken on 32-bit H_W_I hosts. */
11404 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11406 for (i
= 0; i
< 8; i
++)
11407 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11408 << (i
* BITS_PER_UNIT
);
11410 *modconst
= GEN_INT (imm
);
11414 unsigned HOST_WIDE_INT imm
= 0;
11416 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11417 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11419 *modconst
= GEN_INT (imm
);
11427 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11428 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11429 float elements), and a modified constant (whatever should be output for a
11430 VMOV) in *MODCONST. */
11433 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11434 rtx
*modconst
, int *elementwidth
)
11438 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11444 *modconst
= tmpconst
;
11447 *elementwidth
= tmpwidth
;
11452 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11453 the immediate is valid, write a constant suitable for using as an operand
11454 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11455 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11458 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11459 rtx
*modconst
, int *elementwidth
)
11463 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11465 if (retval
< 0 || retval
> 5)
11469 *modconst
= tmpconst
;
11472 *elementwidth
= tmpwidth
;
11477 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11478 the immediate is valid, write a constant suitable for using as an operand
11479 to VSHR/VSHL to *MODCONST and the corresponding element width to
11480 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11481 because they have different limitations. */
11484 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11485 rtx
*modconst
, int *elementwidth
,
11488 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11489 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11490 unsigned HOST_WIDE_INT last_elt
= 0;
11491 unsigned HOST_WIDE_INT maxshift
;
11493 /* Split vector constant out into a byte vector. */
11494 for (i
= 0; i
< n_elts
; i
++)
11496 rtx el
= CONST_VECTOR_ELT (op
, i
);
11497 unsigned HOST_WIDE_INT elpart
;
11499 if (CONST_INT_P (el
))
11500 elpart
= INTVAL (el
);
11501 else if (CONST_DOUBLE_P (el
))
11504 gcc_unreachable ();
11506 if (i
!= 0 && elpart
!= last_elt
)
11512 /* Shift less than element size. */
11513 maxshift
= innersize
* 8;
11517 /* Left shift immediate value can be from 0 to <size>-1. */
11518 if (last_elt
>= maxshift
)
11523 /* Right shift immediate value can be from 1 to <size>. */
11524 if (last_elt
== 0 || last_elt
> maxshift
)
11529 *elementwidth
= innersize
* 8;
11532 *modconst
= CONST_VECTOR_ELT (op
, 0);
11537 /* Return a string suitable for output of Neon immediate logic operation
11541 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
11542 int inverse
, int quad
)
11544 int width
, is_valid
;
11545 static char templ
[40];
11547 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
11549 gcc_assert (is_valid
!= 0);
11552 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
11554 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
11559 /* Return a string suitable for output of Neon immediate shift operation
11560 (VSHR or VSHL) MNEM. */
11563 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
11564 machine_mode mode
, int quad
,
11567 int width
, is_valid
;
11568 static char templ
[40];
11570 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
11571 gcc_assert (is_valid
!= 0);
11574 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
11576 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
11581 /* Output a sequence of pairwise operations to implement a reduction.
11582 NOTE: We do "too much work" here, because pairwise operations work on two
11583 registers-worth of operands in one go. Unfortunately we can't exploit those
11584 extra calculations to do the full operation in fewer steps, I don't think.
11585 Although all vector elements of the result but the first are ignored, we
11586 actually calculate the same result in each of the elements. An alternative
11587 such as initially loading a vector with zero to use as each of the second
11588 operands would use up an additional register and take an extra instruction,
11589 for no particular gain. */
11592 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
11593 rtx (*reduc
) (rtx
, rtx
, rtx
))
11595 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
11598 for (i
= parts
/ 2; i
>= 1; i
/= 2)
11600 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
11601 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
11606 /* If VALS is a vector constant that can be loaded into a register
11607 using VDUP, generate instructions to do so and return an RTX to
11608 assign to the register. Otherwise return NULL_RTX. */
11611 neon_vdup_constant (rtx vals
)
11613 machine_mode mode
= GET_MODE (vals
);
11614 machine_mode inner_mode
= GET_MODE_INNER (mode
);
11617 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
11620 if (!const_vec_duplicate_p (vals
, &x
))
11621 /* The elements are not all the same. We could handle repeating
11622 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
11623 {0, C, 0, C, 0, C, 0, C} which can be loaded using
11627 /* We can load this constant by using VDUP and a constant in a
11628 single ARM register. This will be cheaper than a vector
11631 x
= copy_to_mode_reg (inner_mode
, x
);
11632 return gen_rtx_VEC_DUPLICATE (mode
, x
);
11635 /* Generate code to load VALS, which is a PARALLEL containing only
11636 constants (for vec_init) or CONST_VECTOR, efficiently into a
11637 register. Returns an RTX to copy into the register, or NULL_RTX
11638 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11641 neon_make_constant (rtx vals
)
11643 machine_mode mode
= GET_MODE (vals
);
11645 rtx const_vec
= NULL_RTX
;
11646 int n_elts
= GET_MODE_NUNITS (mode
);
11650 if (GET_CODE (vals
) == CONST_VECTOR
)
11652 else if (GET_CODE (vals
) == PARALLEL
)
11654 /* A CONST_VECTOR must contain only CONST_INTs and
11655 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11656 Only store valid constants in a CONST_VECTOR. */
11657 for (i
= 0; i
< n_elts
; ++i
)
11659 rtx x
= XVECEXP (vals
, 0, i
);
11660 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
11663 if (n_const
== n_elts
)
11664 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
11667 gcc_unreachable ();
11669 if (const_vec
!= NULL
11670 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
11671 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
11673 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
11674 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
11675 pipeline cycle; creating the constant takes one or two ARM
11676 pipeline cycles. */
11678 else if (const_vec
!= NULL_RTX
)
11679 /* Load from constant pool. On Cortex-A8 this takes two cycles
11680 (for either double or quad vectors). We can not take advantage
11681 of single-cycle VLD1 because we need a PC-relative addressing
11685 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11686 We can not construct an initializer. */
11690 /* Initialize vector TARGET to VALS. */
11693 neon_expand_vector_init (rtx target
, rtx vals
)
11695 machine_mode mode
= GET_MODE (target
);
11696 machine_mode inner_mode
= GET_MODE_INNER (mode
);
11697 int n_elts
= GET_MODE_NUNITS (mode
);
11698 int n_var
= 0, one_var
= -1;
11699 bool all_same
= true;
11703 for (i
= 0; i
< n_elts
; ++i
)
11705 x
= XVECEXP (vals
, 0, i
);
11706 if (!CONSTANT_P (x
))
11707 ++n_var
, one_var
= i
;
11709 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
11715 rtx constant
= neon_make_constant (vals
);
11716 if (constant
!= NULL_RTX
)
11718 emit_move_insn (target
, constant
);
11723 /* Splat a single non-constant element if we can. */
11724 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
11726 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
11727 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
11731 /* One field is non-constant. Load constant then overwrite varying
11732 field. This is more efficient than using the stack. */
11735 rtx copy
= copy_rtx (vals
);
11736 rtx index
= GEN_INT (one_var
);
11738 /* Load constant part of vector, substitute neighboring value for
11739 varying element. */
11740 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
11741 neon_expand_vector_init (target
, copy
);
11743 /* Insert variable. */
11744 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
11748 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
11751 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
11754 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
11757 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
11760 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
11763 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
11766 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
11769 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
11772 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
11775 gcc_unreachable ();
11780 /* Construct the vector in memory one field at a time
11781 and load the whole vector. */
11782 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
11783 for (i
= 0; i
< n_elts
; i
++)
11784 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
11785 i
* GET_MODE_SIZE (inner_mode
)),
11786 XVECEXP (vals
, 0, i
));
11787 emit_move_insn (target
, mem
);
11790 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
11791 ERR if it doesn't. EXP indicates the source location, which includes the
11792 inlining history for intrinsics. */
11795 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
11796 const_tree exp
, const char *desc
)
11798 HOST_WIDE_INT lane
;
11800 gcc_assert (CONST_INT_P (operand
));
11802 lane
= INTVAL (operand
);
11804 if (lane
< low
|| lane
>= high
)
11807 error ("%K%s %wd out of range %wd - %wd",
11808 exp
, desc
, lane
, low
, high
- 1);
11810 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
11814 /* Bounds-check lanes. */
11817 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
11820 bounds_check (operand
, low
, high
, exp
, "lane");
11823 /* Bounds-check constants. */
11826 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
11828 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
11832 neon_element_bits (machine_mode mode
)
11834 return GET_MODE_UNIT_BITSIZE (mode
);
11838 /* Predicates for `match_operand' and `match_operator'. */
11840 /* Return TRUE if OP is a valid coprocessor memory address pattern.
11841 WB is true if full writeback address modes are allowed and is false
11842 if limited writeback address modes (POST_INC and PRE_DEC) are
11846 arm_coproc_mem_operand (rtx op
, bool wb
)
11850 /* Reject eliminable registers. */
11851 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
11852 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
11853 || reg_mentioned_p (arg_pointer_rtx
, op
)
11854 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
11855 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
11856 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
11857 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
11860 /* Constants are converted into offsets from labels. */
11864 ind
= XEXP (op
, 0);
11866 if (reload_completed
11867 && (GET_CODE (ind
) == LABEL_REF
11868 || (GET_CODE (ind
) == CONST
11869 && GET_CODE (XEXP (ind
, 0)) == PLUS
11870 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
11871 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
11874 /* Match: (mem (reg)). */
11876 return arm_address_register_rtx_p (ind
, 0);
11878 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
11879 acceptable in any case (subject to verification by
11880 arm_address_register_rtx_p). We need WB to be true to accept
11881 PRE_INC and POST_DEC. */
11882 if (GET_CODE (ind
) == POST_INC
11883 || GET_CODE (ind
) == PRE_DEC
11885 && (GET_CODE (ind
) == PRE_INC
11886 || GET_CODE (ind
) == POST_DEC
)))
11887 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
11890 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
11891 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
11892 && GET_CODE (XEXP (ind
, 1)) == PLUS
11893 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
11894 ind
= XEXP (ind
, 1);
11899 if (GET_CODE (ind
) == PLUS
11900 && REG_P (XEXP (ind
, 0))
11901 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
11902 && CONST_INT_P (XEXP (ind
, 1))
11903 && INTVAL (XEXP (ind
, 1)) > -1024
11904 && INTVAL (XEXP (ind
, 1)) < 1024
11905 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
11911 /* Return TRUE if OP is a memory operand which we can load or store a vector
11912 to/from. TYPE is one of the following values:
11913 0 - Vector load/stor (vldr)
11914 1 - Core registers (ldm)
11915 2 - Element/structure loads (vld1)
11918 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
11922 /* Reject eliminable registers. */
11923 if (strict
&& ! (reload_in_progress
|| reload_completed
)
11924 && (reg_mentioned_p (frame_pointer_rtx
, op
)
11925 || reg_mentioned_p (arg_pointer_rtx
, op
)
11926 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
11927 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
11928 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
11929 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
11932 /* Constants are converted into offsets from labels. */
11936 ind
= XEXP (op
, 0);
11938 if (reload_completed
11939 && (GET_CODE (ind
) == LABEL_REF
11940 || (GET_CODE (ind
) == CONST
11941 && GET_CODE (XEXP (ind
, 0)) == PLUS
11942 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
11943 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
11946 /* Match: (mem (reg)). */
11948 return arm_address_register_rtx_p (ind
, 0);
11950 /* Allow post-increment with Neon registers. */
11951 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
11952 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
11953 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
11955 /* Allow post-increment by register for VLDn */
11956 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
11957 && GET_CODE (XEXP (ind
, 1)) == PLUS
11958 && REG_P (XEXP (XEXP (ind
, 1), 1)))
11965 && GET_CODE (ind
) == PLUS
11966 && REG_P (XEXP (ind
, 0))
11967 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
11968 && CONST_INT_P (XEXP (ind
, 1))
11969 && INTVAL (XEXP (ind
, 1)) > -1024
11970 /* For quad modes, we restrict the constant offset to be slightly less
11971 than what the instruction format permits. We have no such constraint
11972 on double mode offsets. (This must match arm_legitimate_index_p.) */
11973 && (INTVAL (XEXP (ind
, 1))
11974 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
11975 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
11981 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
11984 neon_struct_mem_operand (rtx op
)
11988 /* Reject eliminable registers. */
11989 if (! (reload_in_progress
|| reload_completed
)
11990 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
11991 || reg_mentioned_p (arg_pointer_rtx
, op
)
11992 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
11993 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
11994 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
11995 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
11998 /* Constants are converted into offsets from labels. */
12002 ind
= XEXP (op
, 0);
12004 if (reload_completed
12005 && (GET_CODE (ind
) == LABEL_REF
12006 || (GET_CODE (ind
) == CONST
12007 && GET_CODE (XEXP (ind
, 0)) == PLUS
12008 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12009 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12012 /* Match: (mem (reg)). */
12014 return arm_address_register_rtx_p (ind
, 0);
12016 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12017 if (GET_CODE (ind
) == POST_INC
12018 || GET_CODE (ind
) == PRE_DEC
)
12019 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12024 /* Return true if X is a register that will be eliminated later on. */
12026 arm_eliminable_register (rtx x
)
12028 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12029 || REGNO (x
) == ARG_POINTER_REGNUM
12030 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12031 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12034 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12035 coprocessor registers. Otherwise return NO_REGS. */
12038 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12040 if (mode
== HFmode
)
12042 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12043 return GENERAL_REGS
;
12044 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12046 return GENERAL_REGS
;
12049 /* The neon move patterns handle all legitimate vector and struct
12052 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12053 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12054 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12055 || VALID_NEON_STRUCT_MODE (mode
)))
12058 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12061 return GENERAL_REGS
;
12064 /* Values which must be returned in the most-significant end of the return
12068 arm_return_in_msb (const_tree valtype
)
12070 return (TARGET_AAPCS_BASED
12071 && BYTES_BIG_ENDIAN
12072 && (AGGREGATE_TYPE_P (valtype
)
12073 || TREE_CODE (valtype
) == COMPLEX_TYPE
12074 || FIXED_POINT_TYPE_P (valtype
)));
12077 /* Return TRUE if X references a SYMBOL_REF. */
12079 symbol_mentioned_p (rtx x
)
12084 if (GET_CODE (x
) == SYMBOL_REF
)
12087 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12088 are constant offsets, not symbols. */
12089 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12092 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12094 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12100 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12101 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12104 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12111 /* Return TRUE if X references a LABEL_REF. */
12113 label_mentioned_p (rtx x
)
12118 if (GET_CODE (x
) == LABEL_REF
)
12121 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12122 instruction, but they are constant offsets, not symbols. */
12123 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12126 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12127 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12133 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12134 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12137 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12145 tls_mentioned_p (rtx x
)
12147 switch (GET_CODE (x
))
12150 return tls_mentioned_p (XEXP (x
, 0));
12153 if (XINT (x
, 1) == UNSPEC_TLS
)
12156 /* Fall through. */
12162 /* Must not copy any rtx that uses a pc-relative address.
12163 Also, disallow copying of load-exclusive instructions that
12164 may appear after splitting of compare-and-swap-style operations
12165 so as to prevent those loops from being transformed away from their
12166 canonical forms (see PR 69904). */
12169 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12171 /* The tls call insn cannot be copied, as it is paired with a data
12173 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12176 subrtx_iterator::array_type array
;
12177 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12179 const_rtx x
= *iter
;
12180 if (GET_CODE (x
) == UNSPEC
12181 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12182 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12186 rtx set
= single_set (insn
);
12189 rtx src
= SET_SRC (set
);
12190 if (GET_CODE (src
) == ZERO_EXTEND
)
12191 src
= XEXP (src
, 0);
12193 /* Catch the load-exclusive and load-acquire operations. */
12194 if (GET_CODE (src
) == UNSPEC_VOLATILE
12195 && (XINT (src
, 1) == VUNSPEC_LL
12196 || XINT (src
, 1) == VUNSPEC_LAX
))
12203 minmax_code (rtx x
)
12205 enum rtx_code code
= GET_CODE (x
);
12218 gcc_unreachable ();
12222 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12225 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12226 int *mask
, bool *signed_sat
)
12228 /* The high bound must be a power of two minus one. */
12229 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12233 /* The low bound is either zero (for usat) or one less than the
12234 negation of the high bound (for ssat). */
12235 if (INTVAL (lo_bound
) == 0)
12240 *signed_sat
= false;
12245 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12250 *signed_sat
= true;
12258 /* Return 1 if memory locations are adjacent. */
12260 adjacent_mem_locations (rtx a
, rtx b
)
12262 /* We don't guarantee to preserve the order of these memory refs. */
12263 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12266 if ((REG_P (XEXP (a
, 0))
12267 || (GET_CODE (XEXP (a
, 0)) == PLUS
12268 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12269 && (REG_P (XEXP (b
, 0))
12270 || (GET_CODE (XEXP (b
, 0)) == PLUS
12271 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12273 HOST_WIDE_INT val0
= 0, val1
= 0;
12277 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12279 reg0
= XEXP (XEXP (a
, 0), 0);
12280 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12283 reg0
= XEXP (a
, 0);
12285 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12287 reg1
= XEXP (XEXP (b
, 0), 0);
12288 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12291 reg1
= XEXP (b
, 0);
12293 /* Don't accept any offset that will require multiple
12294 instructions to handle, since this would cause the
12295 arith_adjacentmem pattern to output an overlong sequence. */
12296 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12299 /* Don't allow an eliminable register: register elimination can make
12300 the offset too large. */
12301 if (arm_eliminable_register (reg0
))
12304 val_diff
= val1
- val0
;
12308 /* If the target has load delay slots, then there's no benefit
12309 to using an ldm instruction unless the offset is zero and
12310 we are optimizing for size. */
12311 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12312 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12313 && (val_diff
== 4 || val_diff
== -4));
12316 return ((REGNO (reg0
) == REGNO (reg1
))
12317 && (val_diff
== 4 || val_diff
== -4));
12323 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12324 for load operations, false for store operations. CONSECUTIVE is true
12325 if the register numbers in the operation must be consecutive in the register
12326 bank. RETURN_PC is true if value is to be loaded in PC.
12327 The pattern we are trying to match for load is:
12328 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12329 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12332 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12335 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12336 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12337 3. If consecutive is TRUE, then for kth register being loaded,
12338 REGNO (R_dk) = REGNO (R_d0) + k.
12339 The pattern for store is similar. */
12341 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12342 bool consecutive
, bool return_pc
)
12344 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12345 rtx reg
, mem
, addr
;
12347 unsigned first_regno
;
12348 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12350 bool addr_reg_in_reglist
= false;
12351 bool update
= false;
12356 /* If not in SImode, then registers must be consecutive
12357 (e.g., VLDM instructions for DFmode). */
12358 gcc_assert ((mode
== SImode
) || consecutive
);
12359 /* Setting return_pc for stores is illegal. */
12360 gcc_assert (!return_pc
|| load
);
12362 /* Set up the increments and the regs per val based on the mode. */
12363 reg_increment
= GET_MODE_SIZE (mode
);
12364 regs_per_val
= reg_increment
/ 4;
12365 offset_adj
= return_pc
? 1 : 0;
12368 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12369 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12372 /* Check if this is a write-back. */
12373 elt
= XVECEXP (op
, 0, offset_adj
);
12374 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12380 /* The offset adjustment must be the number of registers being
12381 popped times the size of a single register. */
12382 if (!REG_P (SET_DEST (elt
))
12383 || !REG_P (XEXP (SET_SRC (elt
), 0))
12384 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12385 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12386 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12387 ((count
- 1 - offset_adj
) * reg_increment
))
12391 i
= i
+ offset_adj
;
12392 base
= base
+ offset_adj
;
12393 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12394 success depends on the type: VLDM can do just one reg,
12395 LDM must do at least two. */
12396 if ((count
<= i
) && (mode
== SImode
))
12399 elt
= XVECEXP (op
, 0, i
- 1);
12400 if (GET_CODE (elt
) != SET
)
12405 reg
= SET_DEST (elt
);
12406 mem
= SET_SRC (elt
);
12410 reg
= SET_SRC (elt
);
12411 mem
= SET_DEST (elt
);
12414 if (!REG_P (reg
) || !MEM_P (mem
))
12417 regno
= REGNO (reg
);
12418 first_regno
= regno
;
12419 addr
= XEXP (mem
, 0);
12420 if (GET_CODE (addr
) == PLUS
)
12422 if (!CONST_INT_P (XEXP (addr
, 1)))
12425 offset
= INTVAL (XEXP (addr
, 1));
12426 addr
= XEXP (addr
, 0);
12432 /* Don't allow SP to be loaded unless it is also the base register. It
12433 guarantees that SP is reset correctly when an LDM instruction
12434 is interrupted. Otherwise, we might end up with a corrupt stack. */
12435 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12438 for (; i
< count
; i
++)
12440 elt
= XVECEXP (op
, 0, i
);
12441 if (GET_CODE (elt
) != SET
)
12446 reg
= SET_DEST (elt
);
12447 mem
= SET_SRC (elt
);
12451 reg
= SET_SRC (elt
);
12452 mem
= SET_DEST (elt
);
12456 || GET_MODE (reg
) != mode
12457 || REGNO (reg
) <= regno
12460 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12461 /* Don't allow SP to be loaded unless it is also the base register. It
12462 guarantees that SP is reset correctly when an LDM instruction
12463 is interrupted. Otherwise, we might end up with a corrupt stack. */
12464 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12466 || GET_MODE (mem
) != mode
12467 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12468 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12469 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12470 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12471 offset
+ (i
- base
) * reg_increment
))
12472 && (!REG_P (XEXP (mem
, 0))
12473 || offset
+ (i
- base
) * reg_increment
!= 0)))
12476 regno
= REGNO (reg
);
12477 if (regno
== REGNO (addr
))
12478 addr_reg_in_reglist
= true;
12483 if (update
&& addr_reg_in_reglist
)
12486 /* For Thumb-1, address register is always modified - either by write-back
12487 or by explicit load. If the pattern does not describe an update,
12488 then the address register must be in the list of loaded registers. */
12490 return update
|| addr_reg_in_reglist
;
12496 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12497 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12498 instruction. ADD_OFFSET is nonzero if the base address register needs
12499 to be modified with an add instruction before we can use it. */
12502 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
12503 int nops
, HOST_WIDE_INT add_offset
)
12505 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12506 if the offset isn't small enough. The reason 2 ldrs are faster
12507 is because these ARMs are able to do more than one cache access
12508 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12509 whilst the ARM8 has a double bandwidth cache. This means that
12510 these cores can do both an instruction fetch and a data fetch in
12511 a single cycle, so the trick of calculating the address into a
12512 scratch register (one of the result regs) and then doing a load
12513 multiple actually becomes slower (and no smaller in code size).
12514 That is the transformation
12516 ldr rd1, [rbase + offset]
12517 ldr rd2, [rbase + offset + 4]
12521 add rd1, rbase, offset
12522 ldmia rd1, {rd1, rd2}
12524 produces worse code -- '3 cycles + any stalls on rd2' instead of
12525 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12526 access per cycle, the first sequence could never complete in less
12527 than 6 cycles, whereas the ldm sequence would only take 5 and
12528 would make better use of sequential accesses if not hitting the
12531 We cheat here and test 'arm_ld_sched' which we currently know to
12532 only be true for the ARM8, ARM9 and StrongARM. If this ever
12533 changes, then the test below needs to be reworked. */
12534 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
12537 /* XScale has load-store double instructions, but they have stricter
12538 alignment requirements than load-store multiple, so we cannot
12541 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12542 the pipeline until completion.
12550 An ldr instruction takes 1-3 cycles, but does not block the
12559 Best case ldr will always win. However, the more ldr instructions
12560 we issue, the less likely we are to be able to schedule them well.
12561 Using ldr instructions also increases code size.
12563 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12564 for counts of 3 or 4 regs. */
12565 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
12570 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12571 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12572 an array ORDER which describes the sequence to use when accessing the
12573 offsets that produces an ascending order. In this sequence, each
12574 offset must be larger by exactly 4 than the previous one. ORDER[0]
12575 must have been filled in with the lowest offset by the caller.
12576 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12577 we use to verify that ORDER produces an ascending order of registers.
12578 Return true if it was possible to construct such an order, false if
12582 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
12583 int *unsorted_regs
)
12586 for (i
= 1; i
< nops
; i
++)
12590 order
[i
] = order
[i
- 1];
12591 for (j
= 0; j
< nops
; j
++)
12592 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
12594 /* We must find exactly one offset that is higher than the
12595 previous one by 4. */
12596 if (order
[i
] != order
[i
- 1])
12600 if (order
[i
] == order
[i
- 1])
12602 /* The register numbers must be ascending. */
12603 if (unsorted_regs
!= NULL
12604 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
12610 /* Used to determine in a peephole whether a sequence of load
12611 instructions can be changed into a load-multiple instruction.
12612 NOPS is the number of separate load instructions we are examining. The
12613 first NOPS entries in OPERANDS are the destination registers, the
12614 next NOPS entries are memory operands. If this function is
12615 successful, *BASE is set to the common base register of the memory
12616 accesses; *LOAD_OFFSET is set to the first memory location's offset
12617 from that base register.
12618 REGS is an array filled in with the destination register numbers.
12619 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
12620 insn numbers to an ascending order of stores. If CHECK_REGS is true,
12621 the sequence of registers in REGS matches the loads from ascending memory
12622 locations, and the function verifies that the register numbers are
12623 themselves ascending. If CHECK_REGS is false, the register numbers
12624 are stored in the order they are found in the operands. */
12626 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
12627 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
12629 int unsorted_regs
[MAX_LDM_STM_OPS
];
12630 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
12631 int order
[MAX_LDM_STM_OPS
];
12632 rtx base_reg_rtx
= NULL
;
12636 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12637 easily extended if required. */
12638 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
12640 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
12642 /* Loop over the operands and check that the memory references are
12643 suitable (i.e. immediate offsets from the same base register). At
12644 the same time, extract the target register, and the memory
12646 for (i
= 0; i
< nops
; i
++)
12651 /* Convert a subreg of a mem into the mem itself. */
12652 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
12653 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
12655 gcc_assert (MEM_P (operands
[nops
+ i
]));
12657 /* Don't reorder volatile memory references; it doesn't seem worth
12658 looking for the case where the order is ok anyway. */
12659 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
12662 offset
= const0_rtx
;
12664 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
12665 || (GET_CODE (reg
) == SUBREG
12666 && REG_P (reg
= SUBREG_REG (reg
))))
12667 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
12668 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
12669 || (GET_CODE (reg
) == SUBREG
12670 && REG_P (reg
= SUBREG_REG (reg
))))
12671 && (CONST_INT_P (offset
12672 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
12676 base_reg
= REGNO (reg
);
12677 base_reg_rtx
= reg
;
12678 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
12681 else if (base_reg
!= (int) REGNO (reg
))
12682 /* Not addressed from the same base register. */
12685 unsorted_regs
[i
] = (REG_P (operands
[i
])
12686 ? REGNO (operands
[i
])
12687 : REGNO (SUBREG_REG (operands
[i
])));
12689 /* If it isn't an integer register, or if it overwrites the
12690 base register but isn't the last insn in the list, then
12691 we can't do this. */
12692 if (unsorted_regs
[i
] < 0
12693 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
12694 || unsorted_regs
[i
] > 14
12695 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
12698 /* Don't allow SP to be loaded unless it is also the base
12699 register. It guarantees that SP is reset correctly when
12700 an LDM instruction is interrupted. Otherwise, we might
12701 end up with a corrupt stack. */
12702 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
12705 unsorted_offsets
[i
] = INTVAL (offset
);
12706 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
12710 /* Not a suitable memory address. */
12714 /* All the useful information has now been extracted from the
12715 operands into unsorted_regs and unsorted_offsets; additionally,
12716 order[0] has been set to the lowest offset in the list. Sort
12717 the offsets into order, verifying that they are adjacent, and
12718 check that the register numbers are ascending. */
12719 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
12720 check_regs
? unsorted_regs
: NULL
))
12724 memcpy (saved_order
, order
, sizeof order
);
12730 for (i
= 0; i
< nops
; i
++)
12731 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
12733 *load_offset
= unsorted_offsets
[order
[0]];
12737 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
12740 if (unsorted_offsets
[order
[0]] == 0)
12741 ldm_case
= 1; /* ldmia */
12742 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
12743 ldm_case
= 2; /* ldmib */
12744 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
12745 ldm_case
= 3; /* ldmda */
12746 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
12747 ldm_case
= 4; /* ldmdb */
12748 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
12749 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
12754 if (!multiple_operation_profitable_p (false, nops
,
12756 ? unsorted_offsets
[order
[0]] : 0))
12762 /* Used to determine in a peephole whether a sequence of store instructions can
12763 be changed into a store-multiple instruction.
12764 NOPS is the number of separate store instructions we are examining.
12765 NOPS_TOTAL is the total number of instructions recognized by the peephole
12767 The first NOPS entries in OPERANDS are the source registers, the next
12768 NOPS entries are memory operands. If this function is successful, *BASE is
12769 set to the common base register of the memory accesses; *LOAD_OFFSET is set
12770 to the first memory location's offset from that base register. REGS is an
12771 array filled in with the source register numbers, REG_RTXS (if nonnull) is
12772 likewise filled with the corresponding rtx's.
12773 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
12774 numbers to an ascending order of stores.
12775 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
12776 from ascending memory locations, and the function verifies that the register
12777 numbers are themselves ascending. If CHECK_REGS is false, the register
12778 numbers are stored in the order they are found in the operands. */
12780 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
12781 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
12782 HOST_WIDE_INT
*load_offset
, bool check_regs
)
12784 int unsorted_regs
[MAX_LDM_STM_OPS
];
12785 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
12786 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
12787 int order
[MAX_LDM_STM_OPS
];
12789 rtx base_reg_rtx
= NULL
;
12792 /* Write back of base register is currently only supported for Thumb 1. */
12793 int base_writeback
= TARGET_THUMB1
;
12795 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12796 easily extended if required. */
12797 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
12799 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
12801 /* Loop over the operands and check that the memory references are
12802 suitable (i.e. immediate offsets from the same base register). At
12803 the same time, extract the target register, and the memory
12805 for (i
= 0; i
< nops
; i
++)
12810 /* Convert a subreg of a mem into the mem itself. */
12811 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
12812 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
12814 gcc_assert (MEM_P (operands
[nops
+ i
]));
12816 /* Don't reorder volatile memory references; it doesn't seem worth
12817 looking for the case where the order is ok anyway. */
12818 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
12821 offset
= const0_rtx
;
12823 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
12824 || (GET_CODE (reg
) == SUBREG
12825 && REG_P (reg
= SUBREG_REG (reg
))))
12826 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
12827 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
12828 || (GET_CODE (reg
) == SUBREG
12829 && REG_P (reg
= SUBREG_REG (reg
))))
12830 && (CONST_INT_P (offset
12831 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
12833 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
12834 ? operands
[i
] : SUBREG_REG (operands
[i
]));
12835 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
12839 base_reg
= REGNO (reg
);
12840 base_reg_rtx
= reg
;
12841 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
12844 else if (base_reg
!= (int) REGNO (reg
))
12845 /* Not addressed from the same base register. */
12848 /* If it isn't an integer register, then we can't do this. */
12849 if (unsorted_regs
[i
] < 0
12850 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
12851 /* The effects are unpredictable if the base register is
12852 both updated and stored. */
12853 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
12854 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
12855 || unsorted_regs
[i
] > 14)
12858 unsorted_offsets
[i
] = INTVAL (offset
);
12859 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
12863 /* Not a suitable memory address. */
12867 /* All the useful information has now been extracted from the
12868 operands into unsorted_regs and unsorted_offsets; additionally,
12869 order[0] has been set to the lowest offset in the list. Sort
12870 the offsets into order, verifying that they are adjacent, and
12871 check that the register numbers are ascending. */
12872 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
12873 check_regs
? unsorted_regs
: NULL
))
12877 memcpy (saved_order
, order
, sizeof order
);
12883 for (i
= 0; i
< nops
; i
++)
12885 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
12887 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
12890 *load_offset
= unsorted_offsets
[order
[0]];
12894 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
12897 if (unsorted_offsets
[order
[0]] == 0)
12898 stm_case
= 1; /* stmia */
12899 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
12900 stm_case
= 2; /* stmib */
12901 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
12902 stm_case
= 3; /* stmda */
12903 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
12904 stm_case
= 4; /* stmdb */
12908 if (!multiple_operation_profitable_p (false, nops
, 0))
12914 /* Routines for use in generating RTL. */
12916 /* Generate a load-multiple instruction. COUNT is the number of loads in
12917 the instruction; REGS and MEMS are arrays containing the operands.
12918 BASEREG is the base register to be used in addressing the memory operands.
12919 WBACK_OFFSET is nonzero if the instruction should update the base
12923 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
12924 HOST_WIDE_INT wback_offset
)
12929 if (!multiple_operation_profitable_p (false, count
, 0))
12935 for (i
= 0; i
< count
; i
++)
12936 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
12938 if (wback_offset
!= 0)
12939 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
12941 seq
= get_insns ();
12947 result
= gen_rtx_PARALLEL (VOIDmode
,
12948 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
12949 if (wback_offset
!= 0)
12951 XVECEXP (result
, 0, 0)
12952 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
12957 for (j
= 0; i
< count
; i
++, j
++)
12958 XVECEXP (result
, 0, i
)
12959 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
12964 /* Generate a store-multiple instruction. COUNT is the number of stores in
12965 the instruction; REGS and MEMS are arrays containing the operands.
12966 BASEREG is the base register to be used in addressing the memory operands.
12967 WBACK_OFFSET is nonzero if the instruction should update the base
12971 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
12972 HOST_WIDE_INT wback_offset
)
12977 if (GET_CODE (basereg
) == PLUS
)
12978 basereg
= XEXP (basereg
, 0);
12980 if (!multiple_operation_profitable_p (false, count
, 0))
12986 for (i
= 0; i
< count
; i
++)
12987 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
12989 if (wback_offset
!= 0)
12990 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
12992 seq
= get_insns ();
12998 result
= gen_rtx_PARALLEL (VOIDmode
,
12999 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13000 if (wback_offset
!= 0)
13002 XVECEXP (result
, 0, 0)
13003 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13008 for (j
= 0; i
< count
; i
++, j
++)
13009 XVECEXP (result
, 0, i
)
13010 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13015 /* Generate either a load-multiple or a store-multiple instruction. This
13016 function can be used in situations where we can start with a single MEM
13017 rtx and adjust its address upwards.
13018 COUNT is the number of operations in the instruction, not counting a
13019 possible update of the base register. REGS is an array containing the
13021 BASEREG is the base register to be used in addressing the memory operands,
13022 which are constructed from BASEMEM.
13023 WRITE_BACK specifies whether the generated instruction should include an
13024 update of the base register.
13025 OFFSETP is used to pass an offset to and from this function; this offset
13026 is not used when constructing the address (instead BASEMEM should have an
13027 appropriate offset in its address), it is used only for setting
13028 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13031 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13032 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13034 rtx mems
[MAX_LDM_STM_OPS
];
13035 HOST_WIDE_INT offset
= *offsetp
;
13038 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13040 if (GET_CODE (basereg
) == PLUS
)
13041 basereg
= XEXP (basereg
, 0);
13043 for (i
= 0; i
< count
; i
++)
13045 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13046 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13054 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13055 write_back
? 4 * count
: 0);
13057 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13058 write_back
? 4 * count
: 0);
13062 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13063 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13065 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13070 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13071 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13073 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13077 /* Called from a peephole2 expander to turn a sequence of loads into an
13078 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13079 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13080 is true if we can reorder the registers because they are used commutatively
13082 Returns true iff we could generate a new instruction. */
13085 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13087 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13088 rtx mems
[MAX_LDM_STM_OPS
];
13089 int i
, j
, base_reg
;
13091 HOST_WIDE_INT offset
;
13092 int write_back
= FALSE
;
13096 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13097 &base_reg
, &offset
, !sort_regs
);
13103 for (i
= 0; i
< nops
- 1; i
++)
13104 for (j
= i
+ 1; j
< nops
; j
++)
13105 if (regs
[i
] > regs
[j
])
13111 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13115 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13116 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13122 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13123 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13125 if (!TARGET_THUMB1
)
13127 base_reg
= regs
[0];
13128 base_reg_rtx
= newbase
;
13132 for (i
= 0; i
< nops
; i
++)
13134 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13135 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13138 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13139 write_back
? offset
+ i
* 4 : 0));
13143 /* Called from a peephole2 expander to turn a sequence of stores into an
13144 STM instruction. OPERANDS are the operands found by the peephole matcher;
13145 NOPS indicates how many separate stores we are trying to combine.
13146 Returns true iff we could generate a new instruction. */
13149 gen_stm_seq (rtx
*operands
, int nops
)
13152 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13153 rtx mems
[MAX_LDM_STM_OPS
];
13156 HOST_WIDE_INT offset
;
13157 int write_back
= FALSE
;
13160 bool base_reg_dies
;
13162 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13163 mem_order
, &base_reg
, &offset
, true);
13168 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13170 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13173 gcc_assert (base_reg_dies
);
13179 gcc_assert (base_reg_dies
);
13180 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13184 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13186 for (i
= 0; i
< nops
; i
++)
13188 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13189 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13192 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13193 write_back
? offset
+ i
* 4 : 0));
13197 /* Called from a peephole2 expander to turn a sequence of stores that are
13198 preceded by constant loads into an STM instruction. OPERANDS are the
13199 operands found by the peephole matcher; NOPS indicates how many
13200 separate stores we are trying to combine; there are 2 * NOPS
13201 instructions in the peephole.
13202 Returns true iff we could generate a new instruction. */
13205 gen_const_stm_seq (rtx
*operands
, int nops
)
13207 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13208 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13209 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13210 rtx mems
[MAX_LDM_STM_OPS
];
13213 HOST_WIDE_INT offset
;
13214 int write_back
= FALSE
;
13217 bool base_reg_dies
;
13219 HARD_REG_SET allocated
;
13221 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13222 mem_order
, &base_reg
, &offset
, false);
13227 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13229 /* If the same register is used more than once, try to find a free
13231 CLEAR_HARD_REG_SET (allocated
);
13232 for (i
= 0; i
< nops
; i
++)
13234 for (j
= i
+ 1; j
< nops
; j
++)
13235 if (regs
[i
] == regs
[j
])
13237 rtx t
= peep2_find_free_register (0, nops
* 2,
13238 TARGET_THUMB1
? "l" : "r",
13239 SImode
, &allocated
);
13243 regs
[i
] = REGNO (t
);
13247 /* Compute an ordering that maps the register numbers to an ascending
13250 for (i
= 0; i
< nops
; i
++)
13251 if (regs
[i
] < regs
[reg_order
[0]])
13254 for (i
= 1; i
< nops
; i
++)
13256 int this_order
= reg_order
[i
- 1];
13257 for (j
= 0; j
< nops
; j
++)
13258 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13259 && (this_order
== reg_order
[i
- 1]
13260 || regs
[j
] < regs
[this_order
]))
13262 reg_order
[i
] = this_order
;
13265 /* Ensure that registers that must be live after the instruction end
13266 up with the correct value. */
13267 for (i
= 0; i
< nops
; i
++)
13269 int this_order
= reg_order
[i
];
13270 if ((this_order
!= mem_order
[i
]
13271 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13272 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13276 /* Load the constants. */
13277 for (i
= 0; i
< nops
; i
++)
13279 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13280 sorted_regs
[i
] = regs
[reg_order
[i
]];
13281 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13284 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13286 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13289 gcc_assert (base_reg_dies
);
13295 gcc_assert (base_reg_dies
);
13296 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13300 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13302 for (i
= 0; i
< nops
; i
++)
13304 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13305 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13308 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13309 write_back
? offset
+ i
* 4 : 0));
13313 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13314 unaligned copies on processors which support unaligned semantics for those
13315 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13316 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13317 An interleave factor of 1 (the minimum) will perform no interleaving.
13318 Load/store multiple are used for aligned addresses where possible. */
13321 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13322 HOST_WIDE_INT length
,
13323 unsigned int interleave_factor
)
13325 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13326 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13327 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13328 HOST_WIDE_INT i
, j
;
13329 HOST_WIDE_INT remaining
= length
, words
;
13330 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13332 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13333 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13334 HOST_WIDE_INT srcoffset
, dstoffset
;
13335 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13338 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13340 /* Use hard registers if we have aligned source or destination so we can use
13341 load/store multiple with contiguous registers. */
13342 if (dst_aligned
|| src_aligned
)
13343 for (i
= 0; i
< interleave_factor
; i
++)
13344 regs
[i
] = gen_rtx_REG (SImode
, i
);
13346 for (i
= 0; i
< interleave_factor
; i
++)
13347 regs
[i
] = gen_reg_rtx (SImode
);
13349 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13350 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13352 srcoffset
= dstoffset
= 0;
13354 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13355 For copying the last bytes we want to subtract this offset again. */
13356 src_autoinc
= dst_autoinc
= 0;
13358 for (i
= 0; i
< interleave_factor
; i
++)
13361 /* Copy BLOCK_SIZE_BYTES chunks. */
13363 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13366 if (src_aligned
&& interleave_factor
> 1)
13368 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13369 TRUE
, srcbase
, &srcoffset
));
13370 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13374 for (j
= 0; j
< interleave_factor
; j
++)
13376 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13378 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13379 srcoffset
+ j
* UNITS_PER_WORD
);
13380 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13382 srcoffset
+= block_size_bytes
;
13386 if (dst_aligned
&& interleave_factor
> 1)
13388 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13389 TRUE
, dstbase
, &dstoffset
));
13390 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13394 for (j
= 0; j
< interleave_factor
; j
++)
13396 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13398 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13399 dstoffset
+ j
* UNITS_PER_WORD
);
13400 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13402 dstoffset
+= block_size_bytes
;
13405 remaining
-= block_size_bytes
;
13408 /* Copy any whole words left (note these aren't interleaved with any
13409 subsequent halfword/byte load/stores in the interests of simplicity). */
13411 words
= remaining
/ UNITS_PER_WORD
;
13413 gcc_assert (words
< interleave_factor
);
13415 if (src_aligned
&& words
> 1)
13417 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13419 src_autoinc
+= UNITS_PER_WORD
* words
;
13423 for (j
= 0; j
< words
; j
++)
13425 addr
= plus_constant (Pmode
, src
,
13426 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13427 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13428 srcoffset
+ j
* UNITS_PER_WORD
);
13430 emit_move_insn (regs
[j
], mem
);
13432 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13434 srcoffset
+= words
* UNITS_PER_WORD
;
13437 if (dst_aligned
&& words
> 1)
13439 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13441 dst_autoinc
+= words
* UNITS_PER_WORD
;
13445 for (j
= 0; j
< words
; j
++)
13447 addr
= plus_constant (Pmode
, dst
,
13448 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13449 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13450 dstoffset
+ j
* UNITS_PER_WORD
);
13452 emit_move_insn (mem
, regs
[j
]);
13454 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13456 dstoffset
+= words
* UNITS_PER_WORD
;
13459 remaining
-= words
* UNITS_PER_WORD
;
13461 gcc_assert (remaining
< 4);
13463 /* Copy a halfword if necessary. */
13465 if (remaining
>= 2)
13467 halfword_tmp
= gen_reg_rtx (SImode
);
13469 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13470 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13471 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13473 /* Either write out immediately, or delay until we've loaded the last
13474 byte, depending on interleave factor. */
13475 if (interleave_factor
== 1)
13477 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13478 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13479 emit_insn (gen_unaligned_storehi (mem
,
13480 gen_lowpart (HImode
, halfword_tmp
)));
13481 halfword_tmp
= NULL
;
13489 gcc_assert (remaining
< 2);
13491 /* Copy last byte. */
13493 if ((remaining
& 1) != 0)
13495 byte_tmp
= gen_reg_rtx (SImode
);
13497 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13498 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13499 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
13501 if (interleave_factor
== 1)
13503 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13504 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13505 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13514 /* Store last halfword if we haven't done so already. */
13518 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13519 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13520 emit_insn (gen_unaligned_storehi (mem
,
13521 gen_lowpart (HImode
, halfword_tmp
)));
13525 /* Likewise for last byte. */
13529 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13530 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13531 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13535 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
13538 /* From mips_adjust_block_mem:
13540 Helper function for doing a loop-based block operation on memory
13541 reference MEM. Each iteration of the loop will operate on LENGTH
13544 Create a new base register for use within the loop and point it to
13545 the start of MEM. Create a new memory reference that uses this
13546 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13549 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
13552 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
13554 /* Although the new mem does not refer to a known location,
13555 it does keep up to LENGTH bytes of alignment. */
13556 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
13557 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
13560 /* From mips_block_move_loop:
13562 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13563 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13564 the memory regions do not overlap. */
13567 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
13568 unsigned int interleave_factor
,
13569 HOST_WIDE_INT bytes_per_iter
)
13571 rtx src_reg
, dest_reg
, final_src
, test
;
13572 HOST_WIDE_INT leftover
;
13574 leftover
= length
% bytes_per_iter
;
13575 length
-= leftover
;
13577 /* Create registers and memory references for use within the loop. */
13578 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
13579 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
13581 /* Calculate the value that SRC_REG should have after the last iteration of
13583 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
13584 0, 0, OPTAB_WIDEN
);
13586 /* Emit the start of the loop. */
13587 rtx_code_label
*label
= gen_label_rtx ();
13588 emit_label (label
);
13590 /* Emit the loop body. */
13591 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
13592 interleave_factor
);
13594 /* Move on to the next block. */
13595 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
13596 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
13598 /* Emit the loop condition. */
13599 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
13600 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
13602 /* Mop up any left-over bytes. */
13604 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
13607 /* Emit a block move when either the source or destination is unaligned (not
13608 aligned to a four-byte boundary). This may need further tuning depending on
13609 core type, optimize_size setting, etc. */
13612 arm_movmemqi_unaligned (rtx
*operands
)
13614 HOST_WIDE_INT length
= INTVAL (operands
[2]);
13618 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
13619 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
13620 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
13621 size of code if optimizing for size. We'll use ldm/stm if src_aligned
13622 or dst_aligned though: allow more interleaving in those cases since the
13623 resulting code can be smaller. */
13624 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
13625 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
13628 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
13629 interleave_factor
, bytes_per_iter
);
13631 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
13632 interleave_factor
);
13636 /* Note that the loop created by arm_block_move_unaligned_loop may be
13637 subject to loop unrolling, which makes tuning this condition a little
13640 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
13642 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
13649 arm_gen_movmemqi (rtx
*operands
)
13651 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
13652 HOST_WIDE_INT srcoffset
, dstoffset
;
13654 rtx src
, dst
, srcbase
, dstbase
;
13655 rtx part_bytes_reg
= NULL
;
13658 if (!CONST_INT_P (operands
[2])
13659 || !CONST_INT_P (operands
[3])
13660 || INTVAL (operands
[2]) > 64)
13663 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
13664 return arm_movmemqi_unaligned (operands
);
13666 if (INTVAL (operands
[3]) & 3)
13669 dstbase
= operands
[0];
13670 srcbase
= operands
[1];
13672 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
13673 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
13675 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
13676 out_words_to_go
= INTVAL (operands
[2]) / 4;
13677 last_bytes
= INTVAL (operands
[2]) & 3;
13678 dstoffset
= srcoffset
= 0;
13680 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
13681 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
13683 for (i
= 0; in_words_to_go
>= 2; i
+=4)
13685 if (in_words_to_go
> 4)
13686 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
13687 TRUE
, srcbase
, &srcoffset
));
13689 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
13690 src
, FALSE
, srcbase
,
13693 if (out_words_to_go
)
13695 if (out_words_to_go
> 4)
13696 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
13697 TRUE
, dstbase
, &dstoffset
));
13698 else if (out_words_to_go
!= 1)
13699 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
13700 out_words_to_go
, dst
,
13703 dstbase
, &dstoffset
));
13706 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
13707 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
13708 if (last_bytes
!= 0)
13710 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
13716 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
13717 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
13720 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
13721 if (out_words_to_go
)
13725 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
13726 sreg
= copy_to_reg (mem
);
13728 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
13729 emit_move_insn (mem
, sreg
);
13732 gcc_assert (!in_words_to_go
); /* Sanity check */
13735 if (in_words_to_go
)
13737 gcc_assert (in_words_to_go
> 0);
13739 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
13740 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
13743 gcc_assert (!last_bytes
|| part_bytes_reg
);
13745 if (BYTES_BIG_ENDIAN
&& last_bytes
)
13747 rtx tmp
= gen_reg_rtx (SImode
);
13749 /* The bytes we want are in the top end of the word. */
13750 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
13751 GEN_INT (8 * (4 - last_bytes
))));
13752 part_bytes_reg
= tmp
;
13756 mem
= adjust_automodify_address (dstbase
, QImode
,
13757 plus_constant (Pmode
, dst
,
13759 dstoffset
+ last_bytes
- 1);
13760 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
13764 tmp
= gen_reg_rtx (SImode
);
13765 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
13766 part_bytes_reg
= tmp
;
13773 if (last_bytes
> 1)
13775 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
13776 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
13780 rtx tmp
= gen_reg_rtx (SImode
);
13781 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
13782 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
13783 part_bytes_reg
= tmp
;
13790 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
13791 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
13798 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
13801 next_consecutive_mem (rtx mem
)
13803 machine_mode mode
= GET_MODE (mem
);
13804 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
13805 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
13807 return adjust_automodify_address (mem
, mode
, addr
, offset
);
13810 /* Copy using LDRD/STRD instructions whenever possible.
13811 Returns true upon success. */
13813 gen_movmem_ldrd_strd (rtx
*operands
)
13815 unsigned HOST_WIDE_INT len
;
13816 HOST_WIDE_INT align
;
13817 rtx src
, dst
, base
;
13819 bool src_aligned
, dst_aligned
;
13820 bool src_volatile
, dst_volatile
;
13822 gcc_assert (CONST_INT_P (operands
[2]));
13823 gcc_assert (CONST_INT_P (operands
[3]));
13825 len
= UINTVAL (operands
[2]);
13829 /* Maximum alignment we can assume for both src and dst buffers. */
13830 align
= INTVAL (operands
[3]);
13832 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
13835 /* Place src and dst addresses in registers
13836 and update the corresponding mem rtx. */
13838 dst_volatile
= MEM_VOLATILE_P (dst
);
13839 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
13840 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
13841 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
13844 src_volatile
= MEM_VOLATILE_P (src
);
13845 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
13846 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
13847 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
13849 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
13852 if (src_volatile
|| dst_volatile
)
13855 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
13856 if (!(dst_aligned
|| src_aligned
))
13857 return arm_gen_movmemqi (operands
);
13859 /* If the either src or dst is unaligned we'll be accessing it as pairs
13860 of unaligned SImode accesses. Otherwise we can generate DImode
13861 ldrd/strd instructions. */
13862 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
13863 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
13868 reg0
= gen_reg_rtx (DImode
);
13869 rtx low_reg
= NULL_RTX
;
13870 rtx hi_reg
= NULL_RTX
;
13872 if (!src_aligned
|| !dst_aligned
)
13874 low_reg
= gen_lowpart (SImode
, reg0
);
13875 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
13878 emit_move_insn (reg0
, src
);
13881 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
13882 src
= next_consecutive_mem (src
);
13883 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
13887 emit_move_insn (dst
, reg0
);
13890 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
13891 dst
= next_consecutive_mem (dst
);
13892 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
13895 src
= next_consecutive_mem (src
);
13896 dst
= next_consecutive_mem (dst
);
13899 gcc_assert (len
< 8);
13902 /* More than a word but less than a double-word to copy. Copy a word. */
13903 reg0
= gen_reg_rtx (SImode
);
13904 src
= adjust_address (src
, SImode
, 0);
13905 dst
= adjust_address (dst
, SImode
, 0);
13907 emit_move_insn (reg0
, src
);
13909 emit_insn (gen_unaligned_loadsi (reg0
, src
));
13912 emit_move_insn (dst
, reg0
);
13914 emit_insn (gen_unaligned_storesi (dst
, reg0
));
13916 src
= next_consecutive_mem (src
);
13917 dst
= next_consecutive_mem (dst
);
13924 /* Copy the remaining bytes. */
13927 dst
= adjust_address (dst
, HImode
, 0);
13928 src
= adjust_address (src
, HImode
, 0);
13929 reg0
= gen_reg_rtx (SImode
);
13931 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
13933 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
13936 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
13938 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
13940 src
= next_consecutive_mem (src
);
13941 dst
= next_consecutive_mem (dst
);
13946 dst
= adjust_address (dst
, QImode
, 0);
13947 src
= adjust_address (src
, QImode
, 0);
13948 reg0
= gen_reg_rtx (QImode
);
13949 emit_move_insn (reg0
, src
);
13950 emit_move_insn (dst
, reg0
);
13954 /* Select a dominance comparison mode if possible for a test of the general
13955 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
13956 COND_OR == DOM_CC_X_AND_Y => (X && Y)
13957 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
13958 COND_OR == DOM_CC_X_OR_Y => (X || Y)
13959 In all cases OP will be either EQ or NE, but we don't need to know which
13960 here. If we are unable to support a dominance comparison we return
13961 CC mode. This will then fail to match for the RTL expressions that
13962 generate this call. */
13964 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
13966 enum rtx_code cond1
, cond2
;
13969 /* Currently we will probably get the wrong result if the individual
13970 comparisons are not simple. This also ensures that it is safe to
13971 reverse a comparison if necessary. */
13972 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
13974 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
13978 /* The if_then_else variant of this tests the second condition if the
13979 first passes, but is true if the first fails. Reverse the first
13980 condition to get a true "inclusive-or" expression. */
13981 if (cond_or
== DOM_CC_NX_OR_Y
)
13982 cond1
= reverse_condition (cond1
);
13984 /* If the comparisons are not equal, and one doesn't dominate the other,
13985 then we can't do this. */
13987 && !comparison_dominates_p (cond1
, cond2
)
13988 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
13992 std::swap (cond1
, cond2
);
13997 if (cond_or
== DOM_CC_X_AND_Y
)
14002 case EQ
: return CC_DEQmode
;
14003 case LE
: return CC_DLEmode
;
14004 case LEU
: return CC_DLEUmode
;
14005 case GE
: return CC_DGEmode
;
14006 case GEU
: return CC_DGEUmode
;
14007 default: gcc_unreachable ();
14011 if (cond_or
== DOM_CC_X_AND_Y
)
14023 gcc_unreachable ();
14027 if (cond_or
== DOM_CC_X_AND_Y
)
14039 gcc_unreachable ();
14043 if (cond_or
== DOM_CC_X_AND_Y
)
14044 return CC_DLTUmode
;
14049 return CC_DLTUmode
;
14051 return CC_DLEUmode
;
14055 gcc_unreachable ();
14059 if (cond_or
== DOM_CC_X_AND_Y
)
14060 return CC_DGTUmode
;
14065 return CC_DGTUmode
;
14067 return CC_DGEUmode
;
14071 gcc_unreachable ();
14074 /* The remaining cases only occur when both comparisons are the
14077 gcc_assert (cond1
== cond2
);
14081 gcc_assert (cond1
== cond2
);
14085 gcc_assert (cond1
== cond2
);
14089 gcc_assert (cond1
== cond2
);
14090 return CC_DLEUmode
;
14093 gcc_assert (cond1
== cond2
);
14094 return CC_DGEUmode
;
14097 gcc_unreachable ();
14102 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14104 /* All floating point compares return CCFP if it is an equality
14105 comparison, and CCFPE otherwise. */
14106 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14129 gcc_unreachable ();
14133 /* A compare with a shifted operand. Because of canonicalization, the
14134 comparison will have to be swapped when we emit the assembler. */
14135 if (GET_MODE (y
) == SImode
14136 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14137 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14138 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14139 || GET_CODE (x
) == ROTATERT
))
14142 /* This operation is performed swapped, but since we only rely on the Z
14143 flag we don't need an additional mode. */
14144 if (GET_MODE (y
) == SImode
14145 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14146 && GET_CODE (x
) == NEG
14147 && (op
== EQ
|| op
== NE
))
14150 /* This is a special case that is used by combine to allow a
14151 comparison of a shifted byte load to be split into a zero-extend
14152 followed by a comparison of the shifted integer (only valid for
14153 equalities and unsigned inequalities). */
14154 if (GET_MODE (x
) == SImode
14155 && GET_CODE (x
) == ASHIFT
14156 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14157 && GET_CODE (XEXP (x
, 0)) == SUBREG
14158 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14159 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14160 && (op
== EQ
|| op
== NE
14161 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14162 && CONST_INT_P (y
))
14165 /* A construct for a conditional compare, if the false arm contains
14166 0, then both conditions must be true, otherwise either condition
14167 must be true. Not all conditions are possible, so CCmode is
14168 returned if it can't be done. */
14169 if (GET_CODE (x
) == IF_THEN_ELSE
14170 && (XEXP (x
, 2) == const0_rtx
14171 || XEXP (x
, 2) == const1_rtx
)
14172 && COMPARISON_P (XEXP (x
, 0))
14173 && COMPARISON_P (XEXP (x
, 1)))
14174 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14175 INTVAL (XEXP (x
, 2)));
14177 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14178 if (GET_CODE (x
) == AND
14179 && (op
== EQ
|| op
== NE
)
14180 && COMPARISON_P (XEXP (x
, 0))
14181 && COMPARISON_P (XEXP (x
, 1)))
14182 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14185 if (GET_CODE (x
) == IOR
14186 && (op
== EQ
|| op
== NE
)
14187 && COMPARISON_P (XEXP (x
, 0))
14188 && COMPARISON_P (XEXP (x
, 1)))
14189 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14192 /* An operation (on Thumb) where we want to test for a single bit.
14193 This is done by shifting that bit up into the top bit of a
14194 scratch register; we can then branch on the sign bit. */
14196 && GET_MODE (x
) == SImode
14197 && (op
== EQ
|| op
== NE
)
14198 && GET_CODE (x
) == ZERO_EXTRACT
14199 && XEXP (x
, 1) == const1_rtx
)
14202 /* An operation that sets the condition codes as a side-effect, the
14203 V flag is not set correctly, so we can only use comparisons where
14204 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14206 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14207 if (GET_MODE (x
) == SImode
14209 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14210 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14211 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14212 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14213 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14214 || GET_CODE (x
) == LSHIFTRT
14215 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14216 || GET_CODE (x
) == ROTATERT
14217 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14218 return CC_NOOVmode
;
14220 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14223 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14224 && GET_CODE (x
) == PLUS
14225 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14228 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14234 /* A DImode comparison against zero can be implemented by
14235 or'ing the two halves together. */
14236 if (y
== const0_rtx
)
14239 /* We can do an equality test in three Thumb instructions. */
14249 /* DImode unsigned comparisons can be implemented by cmp +
14250 cmpeq without a scratch register. Not worth doing in
14261 /* DImode signed and unsigned comparisons can be implemented
14262 by cmp + sbcs with a scratch register, but that does not
14263 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14264 gcc_assert (op
!= EQ
&& op
!= NE
);
14268 gcc_unreachable ();
14272 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14273 return GET_MODE (x
);
14278 /* X and Y are two things to compare using CODE. Emit the compare insn and
14279 return the rtx for register 0 in the proper mode. FP means this is a
14280 floating point compare: I don't think that it is needed on the arm. */
14282 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14286 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14288 /* We might have X as a constant, Y as a register because of the predicates
14289 used for cmpdi. If so, force X to a register here. */
14290 if (dimode_comparison
&& !REG_P (x
))
14291 x
= force_reg (DImode
, x
);
14293 mode
= SELECT_CC_MODE (code
, x
, y
);
14294 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14296 if (dimode_comparison
14297 && mode
!= CC_CZmode
)
14301 /* To compare two non-zero values for equality, XOR them and
14302 then compare against zero. Not used for ARM mode; there
14303 CC_CZmode is cheaper. */
14304 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14306 gcc_assert (!reload_completed
);
14307 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14311 /* A scratch register is required. */
14312 if (reload_completed
)
14313 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14315 scratch
= gen_rtx_SCRATCH (SImode
);
14317 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14318 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14319 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14322 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14327 /* Generate a sequence of insns that will generate the correct return
14328 address mask depending on the physical architecture that the program
14331 arm_gen_return_addr_mask (void)
14333 rtx reg
= gen_reg_rtx (Pmode
);
14335 emit_insn (gen_return_addr_mask (reg
));
14340 arm_reload_in_hi (rtx
*operands
)
14342 rtx ref
= operands
[1];
14344 HOST_WIDE_INT offset
= 0;
14346 if (GET_CODE (ref
) == SUBREG
)
14348 offset
= SUBREG_BYTE (ref
);
14349 ref
= SUBREG_REG (ref
);
14354 /* We have a pseudo which has been spilt onto the stack; there
14355 are two cases here: the first where there is a simple
14356 stack-slot replacement and a second where the stack-slot is
14357 out of range, or is used as a subreg. */
14358 if (reg_equiv_mem (REGNO (ref
)))
14360 ref
= reg_equiv_mem (REGNO (ref
));
14361 base
= find_replacement (&XEXP (ref
, 0));
14364 /* The slot is out of range, or was dressed up in a SUBREG. */
14365 base
= reg_equiv_address (REGNO (ref
));
14367 /* PR 62554: If there is no equivalent memory location then just move
14368 the value as an SImode register move. This happens when the target
14369 architecture variant does not have an HImode register move. */
14372 gcc_assert (REG_P (operands
[0]));
14373 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14374 gen_rtx_SUBREG (SImode
, ref
, 0)));
14379 base
= find_replacement (&XEXP (ref
, 0));
14381 /* Handle the case where the address is too complex to be offset by 1. */
14382 if (GET_CODE (base
) == MINUS
14383 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14385 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14387 emit_set_insn (base_plus
, base
);
14390 else if (GET_CODE (base
) == PLUS
)
14392 /* The addend must be CONST_INT, or we would have dealt with it above. */
14393 HOST_WIDE_INT hi
, lo
;
14395 offset
+= INTVAL (XEXP (base
, 1));
14396 base
= XEXP (base
, 0);
14398 /* Rework the address into a legal sequence of insns. */
14399 /* Valid range for lo is -4095 -> 4095 */
14402 : -((-offset
) & 0xfff));
14404 /* Corner case, if lo is the max offset then we would be out of range
14405 once we have added the additional 1 below, so bump the msb into the
14406 pre-loading insn(s). */
14410 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14411 ^ (HOST_WIDE_INT
) 0x80000000)
14412 - (HOST_WIDE_INT
) 0x80000000);
14414 gcc_assert (hi
+ lo
== offset
);
14418 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14420 /* Get the base address; addsi3 knows how to handle constants
14421 that require more than one insn. */
14422 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14428 /* Operands[2] may overlap operands[0] (though it won't overlap
14429 operands[1]), that's why we asked for a DImode reg -- so we can
14430 use the bit that does not overlap. */
14431 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14432 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14434 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14436 emit_insn (gen_zero_extendqisi2 (scratch
,
14437 gen_rtx_MEM (QImode
,
14438 plus_constant (Pmode
, base
,
14440 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14441 gen_rtx_MEM (QImode
,
14442 plus_constant (Pmode
, base
,
14444 if (!BYTES_BIG_ENDIAN
)
14445 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14446 gen_rtx_IOR (SImode
,
14449 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14453 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14454 gen_rtx_IOR (SImode
,
14455 gen_rtx_ASHIFT (SImode
, scratch
,
14457 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14460 /* Handle storing a half-word to memory during reload by synthesizing as two
14461 byte stores. Take care not to clobber the input values until after we
14462 have moved them somewhere safe. This code assumes that if the DImode
14463 scratch in operands[2] overlaps either the input value or output address
14464 in some way, then that value must die in this insn (we absolutely need
14465 two scratch registers for some corner cases). */
14467 arm_reload_out_hi (rtx
*operands
)
14469 rtx ref
= operands
[0];
14470 rtx outval
= operands
[1];
14472 HOST_WIDE_INT offset
= 0;
14474 if (GET_CODE (ref
) == SUBREG
)
14476 offset
= SUBREG_BYTE (ref
);
14477 ref
= SUBREG_REG (ref
);
14482 /* We have a pseudo which has been spilt onto the stack; there
14483 are two cases here: the first where there is a simple
14484 stack-slot replacement and a second where the stack-slot is
14485 out of range, or is used as a subreg. */
14486 if (reg_equiv_mem (REGNO (ref
)))
14488 ref
= reg_equiv_mem (REGNO (ref
));
14489 base
= find_replacement (&XEXP (ref
, 0));
14492 /* The slot is out of range, or was dressed up in a SUBREG. */
14493 base
= reg_equiv_address (REGNO (ref
));
14495 /* PR 62254: If there is no equivalent memory location then just move
14496 the value as an SImode register move. This happens when the target
14497 architecture variant does not have an HImode register move. */
14500 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
14502 if (REG_P (outval
))
14504 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14505 gen_rtx_SUBREG (SImode
, outval
, 0)));
14507 else /* SUBREG_P (outval) */
14509 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
14510 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14511 SUBREG_REG (outval
)));
14513 /* FIXME: Handle other cases ? */
14514 gcc_unreachable ();
14520 base
= find_replacement (&XEXP (ref
, 0));
14522 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14524 /* Handle the case where the address is too complex to be offset by 1. */
14525 if (GET_CODE (base
) == MINUS
14526 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14528 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14530 /* Be careful not to destroy OUTVAL. */
14531 if (reg_overlap_mentioned_p (base_plus
, outval
))
14533 /* Updating base_plus might destroy outval, see if we can
14534 swap the scratch and base_plus. */
14535 if (!reg_overlap_mentioned_p (scratch
, outval
))
14536 std::swap (scratch
, base_plus
);
14539 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14541 /* Be conservative and copy OUTVAL into the scratch now,
14542 this should only be necessary if outval is a subreg
14543 of something larger than a word. */
14544 /* XXX Might this clobber base? I can't see how it can,
14545 since scratch is known to overlap with OUTVAL, and
14546 must be wider than a word. */
14547 emit_insn (gen_movhi (scratch_hi
, outval
));
14548 outval
= scratch_hi
;
14552 emit_set_insn (base_plus
, base
);
14555 else if (GET_CODE (base
) == PLUS
)
14557 /* The addend must be CONST_INT, or we would have dealt with it above. */
14558 HOST_WIDE_INT hi
, lo
;
14560 offset
+= INTVAL (XEXP (base
, 1));
14561 base
= XEXP (base
, 0);
14563 /* Rework the address into a legal sequence of insns. */
14564 /* Valid range for lo is -4095 -> 4095 */
14567 : -((-offset
) & 0xfff));
14569 /* Corner case, if lo is the max offset then we would be out of range
14570 once we have added the additional 1 below, so bump the msb into the
14571 pre-loading insn(s). */
14575 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14576 ^ (HOST_WIDE_INT
) 0x80000000)
14577 - (HOST_WIDE_INT
) 0x80000000);
14579 gcc_assert (hi
+ lo
== offset
);
14583 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14585 /* Be careful not to destroy OUTVAL. */
14586 if (reg_overlap_mentioned_p (base_plus
, outval
))
14588 /* Updating base_plus might destroy outval, see if we
14589 can swap the scratch and base_plus. */
14590 if (!reg_overlap_mentioned_p (scratch
, outval
))
14591 std::swap (scratch
, base_plus
);
14594 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14596 /* Be conservative and copy outval into scratch now,
14597 this should only be necessary if outval is a
14598 subreg of something larger than a word. */
14599 /* XXX Might this clobber base? I can't see how it
14600 can, since scratch is known to overlap with
14602 emit_insn (gen_movhi (scratch_hi
, outval
));
14603 outval
= scratch_hi
;
14607 /* Get the base address; addsi3 knows how to handle constants
14608 that require more than one insn. */
14609 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14615 if (BYTES_BIG_ENDIAN
)
14617 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
14618 plus_constant (Pmode
, base
,
14620 gen_lowpart (QImode
, outval
)));
14621 emit_insn (gen_lshrsi3 (scratch
,
14622 gen_rtx_SUBREG (SImode
, outval
, 0),
14624 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
14626 gen_lowpart (QImode
, scratch
)));
14630 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
14632 gen_lowpart (QImode
, outval
)));
14633 emit_insn (gen_lshrsi3 (scratch
,
14634 gen_rtx_SUBREG (SImode
, outval
, 0),
14636 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
14637 plus_constant (Pmode
, base
,
14639 gen_lowpart (QImode
, scratch
)));
14643 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
14644 (padded to the size of a word) should be passed in a register. */
14647 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
14649 if (TARGET_AAPCS_BASED
)
14650 return must_pass_in_stack_var_size (mode
, type
);
14652 return must_pass_in_stack_var_size_or_pad (mode
, type
);
14656 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
14657 Return true if an argument passed on the stack should be padded upwards,
14658 i.e. if the least-significant byte has useful data.
14659 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
14660 aggregate types are placed in the lowest memory address. */
14663 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
14665 if (!TARGET_AAPCS_BASED
)
14666 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
14668 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
14675 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
14676 Return !BYTES_BIG_ENDIAN if the least significant byte of the
14677 register has useful data, and return the opposite if the most
14678 significant byte does. */
14681 arm_pad_reg_upward (machine_mode mode
,
14682 tree type
, int first ATTRIBUTE_UNUSED
)
14684 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
14686 /* For AAPCS, small aggregates, small fixed-point types,
14687 and small complex types are always padded upwards. */
14690 if ((AGGREGATE_TYPE_P (type
)
14691 || TREE_CODE (type
) == COMPLEX_TYPE
14692 || FIXED_POINT_TYPE_P (type
))
14693 && int_size_in_bytes (type
) <= 4)
14698 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
14699 && GET_MODE_SIZE (mode
) <= 4)
14704 /* Otherwise, use default padding. */
14705 return !BYTES_BIG_ENDIAN
;
14708 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
14709 assuming that the address in the base register is word aligned. */
14711 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
14713 HOST_WIDE_INT max_offset
;
14715 /* Offset must be a multiple of 4 in Thumb mode. */
14716 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
14721 else if (TARGET_ARM
)
14726 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
14729 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
14730 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
14731 Assumes that the address in the base register RN is word aligned. Pattern
14732 guarantees that both memory accesses use the same base register,
14733 the offsets are constants within the range, and the gap between the offsets is 4.
14734 If preload complete then check that registers are legal. WBACK indicates whether
14735 address is updated. LOAD indicates whether memory access is load or store. */
14737 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
14738 bool wback
, bool load
)
14740 unsigned int t
, t2
, n
;
14742 if (!reload_completed
)
14745 if (!offset_ok_for_ldrd_strd (offset
))
14752 if ((TARGET_THUMB2
)
14753 && ((wback
&& (n
== t
|| n
== t2
))
14754 || (t
== SP_REGNUM
)
14755 || (t
== PC_REGNUM
)
14756 || (t2
== SP_REGNUM
)
14757 || (t2
== PC_REGNUM
)
14758 || (!load
&& (n
== PC_REGNUM
))
14759 || (load
&& (t
== t2
))
14760 /* Triggers Cortex-M3 LDRD errata. */
14761 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
14765 && ((wback
&& (n
== t
|| n
== t2
))
14766 || (t2
== PC_REGNUM
)
14767 || (t
% 2 != 0) /* First destination register is not even. */
14769 /* PC can be used as base register (for offset addressing only),
14770 but it is depricated. */
14771 || (n
== PC_REGNUM
)))
14777 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
14778 operand MEM's address contains an immediate offset from the base
14779 register and has no side effects, in which case it sets BASE and
14780 OFFSET accordingly. */
14782 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
14786 gcc_assert (base
!= NULL
&& offset
!= NULL
);
14788 /* TODO: Handle more general memory operand patterns, such as
14789 PRE_DEC and PRE_INC. */
14791 if (side_effects_p (mem
))
14794 /* Can't deal with subregs. */
14795 if (GET_CODE (mem
) == SUBREG
)
14798 gcc_assert (MEM_P (mem
));
14800 *offset
= const0_rtx
;
14802 addr
= XEXP (mem
, 0);
14804 /* If addr isn't valid for DImode, then we can't handle it. */
14805 if (!arm_legitimate_address_p (DImode
, addr
,
14806 reload_in_progress
|| reload_completed
))
14814 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
14816 *base
= XEXP (addr
, 0);
14817 *offset
= XEXP (addr
, 1);
14818 return (REG_P (*base
) && CONST_INT_P (*offset
));
14824 /* Called from a peephole2 to replace two word-size accesses with a
14825 single LDRD/STRD instruction. Returns true iff we can generate a
14826 new instruction sequence. That is, both accesses use the same base
14827 register and the gap between constant offsets is 4. This function
14828 may reorder its operands to match ldrd/strd RTL templates.
14829 OPERANDS are the operands found by the peephole matcher;
14830 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
14831 corresponding memory operands. LOAD indicaates whether the access
14832 is load or store. CONST_STORE indicates a store of constant
14833 integer values held in OPERANDS[4,5] and assumes that the pattern
14834 is of length 4 insn, for the purpose of checking dead registers.
14835 COMMUTE indicates that register operands may be reordered. */
14837 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
14838 bool const_store
, bool commute
)
14841 HOST_WIDE_INT offsets
[2], offset
;
14842 rtx base
= NULL_RTX
;
14843 rtx cur_base
, cur_offset
, tmp
;
14845 HARD_REG_SET regset
;
14847 gcc_assert (!const_store
|| !load
);
14848 /* Check that the memory references are immediate offsets from the
14849 same base register. Extract the base register, the destination
14850 registers, and the corresponding memory offsets. */
14851 for (i
= 0; i
< nops
; i
++)
14853 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
14858 else if (REGNO (base
) != REGNO (cur_base
))
14861 offsets
[i
] = INTVAL (cur_offset
);
14862 if (GET_CODE (operands
[i
]) == SUBREG
)
14864 tmp
= SUBREG_REG (operands
[i
]);
14865 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
14870 /* Make sure there is no dependency between the individual loads. */
14871 if (load
&& REGNO (operands
[0]) == REGNO (base
))
14872 return false; /* RAW */
14874 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
14875 return false; /* WAW */
14877 /* If the same input register is used in both stores
14878 when storing different constants, try to find a free register.
14879 For example, the code
14884 can be transformed into
14888 in Thumb mode assuming that r1 is free.
14889 For ARM mode do the same but only if the starting register
14890 can be made to be even. */
14892 && REGNO (operands
[0]) == REGNO (operands
[1])
14893 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
14897 CLEAR_HARD_REG_SET (regset
);
14898 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
14899 if (tmp
== NULL_RTX
)
14902 /* Use the new register in the first load to ensure that
14903 if the original input register is not dead after peephole,
14904 then it will have the correct constant value. */
14907 else if (TARGET_ARM
)
14909 int regno
= REGNO (operands
[0]);
14910 if (!peep2_reg_dead_p (4, operands
[0]))
14912 /* When the input register is even and is not dead after the
14913 pattern, it has to hold the second constant but we cannot
14914 form a legal STRD in ARM mode with this register as the second
14916 if (regno
% 2 == 0)
14919 /* Is regno-1 free? */
14920 SET_HARD_REG_SET (regset
);
14921 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
14922 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
14923 if (tmp
== NULL_RTX
)
14930 /* Find a DImode register. */
14931 CLEAR_HARD_REG_SET (regset
);
14932 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
14933 if (tmp
!= NULL_RTX
)
14935 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
14936 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
14940 /* Can we use the input register to form a DI register? */
14941 SET_HARD_REG_SET (regset
);
14942 CLEAR_HARD_REG_BIT(regset
,
14943 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
14944 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
14945 if (tmp
== NULL_RTX
)
14947 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
14951 gcc_assert (operands
[0] != NULL_RTX
);
14952 gcc_assert (operands
[1] != NULL_RTX
);
14953 gcc_assert (REGNO (operands
[0]) % 2 == 0);
14954 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
14958 /* Make sure the instructions are ordered with lower memory access first. */
14959 if (offsets
[0] > offsets
[1])
14961 gap
= offsets
[0] - offsets
[1];
14962 offset
= offsets
[1];
14964 /* Swap the instructions such that lower memory is accessed first. */
14965 std::swap (operands
[0], operands
[1]);
14966 std::swap (operands
[2], operands
[3]);
14968 std::swap (operands
[4], operands
[5]);
14972 gap
= offsets
[1] - offsets
[0];
14973 offset
= offsets
[0];
14976 /* Make sure accesses are to consecutive memory locations. */
14980 /* Make sure we generate legal instructions. */
14981 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
14985 /* In Thumb state, where registers are almost unconstrained, there
14986 is little hope to fix it. */
14990 if (load
&& commute
)
14992 /* Try reordering registers. */
14993 std::swap (operands
[0], operands
[1]);
14994 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15001 /* If input registers are dead after this pattern, they can be
15002 reordered or replaced by other registers that are free in the
15003 current pattern. */
15004 if (!peep2_reg_dead_p (4, operands
[0])
15005 || !peep2_reg_dead_p (4, operands
[1]))
15008 /* Try to reorder the input registers. */
15009 /* For example, the code
15014 can be transformed into
15019 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15022 std::swap (operands
[0], operands
[1]);
15026 /* Try to find a free DI register. */
15027 CLEAR_HARD_REG_SET (regset
);
15028 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15029 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15032 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15033 if (tmp
== NULL_RTX
)
15036 /* DREG must be an even-numbered register in DImode.
15037 Split it into SI registers. */
15038 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15039 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15040 gcc_assert (operands
[0] != NULL_RTX
);
15041 gcc_assert (operands
[1] != NULL_RTX
);
15042 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15043 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15045 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15057 /* Print a symbolic form of X to the debug file, F. */
15059 arm_print_value (FILE *f
, rtx x
)
15061 switch (GET_CODE (x
))
15064 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15068 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15076 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15078 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15079 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15087 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15091 fprintf (f
, "`%s'", XSTR (x
, 0));
15095 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15099 arm_print_value (f
, XEXP (x
, 0));
15103 arm_print_value (f
, XEXP (x
, 0));
15105 arm_print_value (f
, XEXP (x
, 1));
15113 fprintf (f
, "????");
15118 /* Routines for manipulation of the constant pool. */
15120 /* Arm instructions cannot load a large constant directly into a
15121 register; they have to come from a pc relative load. The constant
15122 must therefore be placed in the addressable range of the pc
15123 relative load. Depending on the precise pc relative load
15124 instruction the range is somewhere between 256 bytes and 4k. This
15125 means that we often have to dump a constant inside a function, and
15126 generate code to branch around it.
15128 It is important to minimize this, since the branches will slow
15129 things down and make the code larger.
15131 Normally we can hide the table after an existing unconditional
15132 branch so that there is no interruption of the flow, but in the
15133 worst case the code looks like this:
15151 We fix this by performing a scan after scheduling, which notices
15152 which instructions need to have their operands fetched from the
15153 constant table and builds the table.
15155 The algorithm starts by building a table of all the constants that
15156 need fixing up and all the natural barriers in the function (places
15157 where a constant table can be dropped without breaking the flow).
15158 For each fixup we note how far the pc-relative replacement will be
15159 able to reach and the offset of the instruction into the function.
15161 Having built the table we then group the fixes together to form
15162 tables that are as large as possible (subject to addressing
15163 constraints) and emit each table of constants after the last
15164 barrier that is within range of all the instructions in the group.
15165 If a group does not contain a barrier, then we forcibly create one
15166 by inserting a jump instruction into the flow. Once the table has
15167 been inserted, the insns are then modified to reference the
15168 relevant entry in the pool.
15170 Possible enhancements to the algorithm (not implemented) are:
15172 1) For some processors and object formats, there may be benefit in
15173 aligning the pools to the start of cache lines; this alignment
15174 would need to be taken into account when calculating addressability
15177 /* These typedefs are located at the start of this file, so that
15178 they can be used in the prototypes there. This comment is to
15179 remind readers of that fact so that the following structures
15180 can be understood more easily.
15182 typedef struct minipool_node Mnode;
15183 typedef struct minipool_fixup Mfix; */
15185 struct minipool_node
15187 /* Doubly linked chain of entries. */
15190 /* The maximum offset into the code that this entry can be placed. While
15191 pushing fixes for forward references, all entries are sorted in order
15192 of increasing max_address. */
15193 HOST_WIDE_INT max_address
;
15194 /* Similarly for an entry inserted for a backwards ref. */
15195 HOST_WIDE_INT min_address
;
15196 /* The number of fixes referencing this entry. This can become zero
15197 if we "unpush" an entry. In this case we ignore the entry when we
15198 come to emit the code. */
15200 /* The offset from the start of the minipool. */
15201 HOST_WIDE_INT offset
;
15202 /* The value in table. */
15204 /* The mode of value. */
15206 /* The size of the value. With iWMMXt enabled
15207 sizes > 4 also imply an alignment of 8-bytes. */
15211 struct minipool_fixup
15215 HOST_WIDE_INT address
;
15221 HOST_WIDE_INT forwards
;
15222 HOST_WIDE_INT backwards
;
15225 /* Fixes less than a word need padding out to a word boundary. */
15226 #define MINIPOOL_FIX_SIZE(mode) \
15227 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15229 static Mnode
* minipool_vector_head
;
15230 static Mnode
* minipool_vector_tail
;
15231 static rtx_code_label
*minipool_vector_label
;
15232 static int minipool_pad
;
15234 /* The linked list of all minipool fixes required for this function. */
15235 Mfix
* minipool_fix_head
;
15236 Mfix
* minipool_fix_tail
;
15237 /* The fix entry for the current minipool, once it has been placed. */
15238 Mfix
* minipool_barrier
;
15240 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15241 #define JUMP_TABLES_IN_TEXT_SECTION 0
15244 static HOST_WIDE_INT
15245 get_jump_table_size (rtx_jump_table_data
*insn
)
15247 /* ADDR_VECs only take room if read-only data does into the text
15249 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15251 rtx body
= PATTERN (insn
);
15252 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15253 HOST_WIDE_INT size
;
15254 HOST_WIDE_INT modesize
;
15256 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15257 size
= modesize
* XVECLEN (body
, elt
);
15261 /* Round up size of TBB table to a halfword boundary. */
15262 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15265 /* No padding necessary for TBH. */
15268 /* Add two bytes for alignment on Thumb. */
15273 gcc_unreachable ();
15281 /* Return the maximum amount of padding that will be inserted before
15284 static HOST_WIDE_INT
15285 get_label_padding (rtx label
)
15287 HOST_WIDE_INT align
, min_insn_size
;
15289 align
= 1 << label_to_alignment (label
);
15290 min_insn_size
= TARGET_THUMB
? 2 : 4;
15291 return align
> min_insn_size
? align
- min_insn_size
: 0;
15294 /* Move a minipool fix MP from its current location to before MAX_MP.
15295 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15296 constraints may need updating. */
15298 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15299 HOST_WIDE_INT max_address
)
15301 /* The code below assumes these are different. */
15302 gcc_assert (mp
!= max_mp
);
15304 if (max_mp
== NULL
)
15306 if (max_address
< mp
->max_address
)
15307 mp
->max_address
= max_address
;
15311 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15312 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15314 mp
->max_address
= max_address
;
15316 /* Unlink MP from its current position. Since max_mp is non-null,
15317 mp->prev must be non-null. */
15318 mp
->prev
->next
= mp
->next
;
15319 if (mp
->next
!= NULL
)
15320 mp
->next
->prev
= mp
->prev
;
15322 minipool_vector_tail
= mp
->prev
;
15324 /* Re-insert it before MAX_MP. */
15326 mp
->prev
= max_mp
->prev
;
15329 if (mp
->prev
!= NULL
)
15330 mp
->prev
->next
= mp
;
15332 minipool_vector_head
= mp
;
15335 /* Save the new entry. */
15338 /* Scan over the preceding entries and adjust their addresses as
15340 while (mp
->prev
!= NULL
15341 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15343 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15350 /* Add a constant to the minipool for a forward reference. Returns the
15351 node added or NULL if the constant will not fit in this pool. */
15353 add_minipool_forward_ref (Mfix
*fix
)
15355 /* If set, max_mp is the first pool_entry that has a lower
15356 constraint than the one we are trying to add. */
15357 Mnode
* max_mp
= NULL
;
15358 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15361 /* If the minipool starts before the end of FIX->INSN then this FIX
15362 can not be placed into the current pool. Furthermore, adding the
15363 new constant pool entry may cause the pool to start FIX_SIZE bytes
15365 if (minipool_vector_head
&&
15366 (fix
->address
+ get_attr_length (fix
->insn
)
15367 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15370 /* Scan the pool to see if a constant with the same value has
15371 already been added. While we are doing this, also note the
15372 location where we must insert the constant if it doesn't already
15374 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15376 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15377 && fix
->mode
== mp
->mode
15378 && (!LABEL_P (fix
->value
)
15379 || (CODE_LABEL_NUMBER (fix
->value
)
15380 == CODE_LABEL_NUMBER (mp
->value
)))
15381 && rtx_equal_p (fix
->value
, mp
->value
))
15383 /* More than one fix references this entry. */
15385 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15388 /* Note the insertion point if necessary. */
15390 && mp
->max_address
> max_address
)
15393 /* If we are inserting an 8-bytes aligned quantity and
15394 we have not already found an insertion point, then
15395 make sure that all such 8-byte aligned quantities are
15396 placed at the start of the pool. */
15397 if (ARM_DOUBLEWORD_ALIGN
15399 && fix
->fix_size
>= 8
15400 && mp
->fix_size
< 8)
15403 max_address
= mp
->max_address
;
15407 /* The value is not currently in the minipool, so we need to create
15408 a new entry for it. If MAX_MP is NULL, the entry will be put on
15409 the end of the list since the placement is less constrained than
15410 any existing entry. Otherwise, we insert the new fix before
15411 MAX_MP and, if necessary, adjust the constraints on the other
15414 mp
->fix_size
= fix
->fix_size
;
15415 mp
->mode
= fix
->mode
;
15416 mp
->value
= fix
->value
;
15418 /* Not yet required for a backwards ref. */
15419 mp
->min_address
= -65536;
15421 if (max_mp
== NULL
)
15423 mp
->max_address
= max_address
;
15425 mp
->prev
= minipool_vector_tail
;
15427 if (mp
->prev
== NULL
)
15429 minipool_vector_head
= mp
;
15430 minipool_vector_label
= gen_label_rtx ();
15433 mp
->prev
->next
= mp
;
15435 minipool_vector_tail
= mp
;
15439 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15440 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15442 mp
->max_address
= max_address
;
15445 mp
->prev
= max_mp
->prev
;
15447 if (mp
->prev
!= NULL
)
15448 mp
->prev
->next
= mp
;
15450 minipool_vector_head
= mp
;
15453 /* Save the new entry. */
15456 /* Scan over the preceding entries and adjust their addresses as
15458 while (mp
->prev
!= NULL
15459 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15461 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15469 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15470 HOST_WIDE_INT min_address
)
15472 HOST_WIDE_INT offset
;
15474 /* The code below assumes these are different. */
15475 gcc_assert (mp
!= min_mp
);
15477 if (min_mp
== NULL
)
15479 if (min_address
> mp
->min_address
)
15480 mp
->min_address
= min_address
;
15484 /* We will adjust this below if it is too loose. */
15485 mp
->min_address
= min_address
;
15487 /* Unlink MP from its current position. Since min_mp is non-null,
15488 mp->next must be non-null. */
15489 mp
->next
->prev
= mp
->prev
;
15490 if (mp
->prev
!= NULL
)
15491 mp
->prev
->next
= mp
->next
;
15493 minipool_vector_head
= mp
->next
;
15495 /* Reinsert it after MIN_MP. */
15497 mp
->next
= min_mp
->next
;
15499 if (mp
->next
!= NULL
)
15500 mp
->next
->prev
= mp
;
15502 minipool_vector_tail
= mp
;
15508 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15510 mp
->offset
= offset
;
15511 if (mp
->refcount
> 0)
15512 offset
+= mp
->fix_size
;
15514 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15515 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15521 /* Add a constant to the minipool for a backward reference. Returns the
15522 node added or NULL if the constant will not fit in this pool.
15524 Note that the code for insertion for a backwards reference can be
15525 somewhat confusing because the calculated offsets for each fix do
15526 not take into account the size of the pool (which is still under
15529 add_minipool_backward_ref (Mfix
*fix
)
15531 /* If set, min_mp is the last pool_entry that has a lower constraint
15532 than the one we are trying to add. */
15533 Mnode
*min_mp
= NULL
;
15534 /* This can be negative, since it is only a constraint. */
15535 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
15538 /* If we can't reach the current pool from this insn, or if we can't
15539 insert this entry at the end of the pool without pushing other
15540 fixes out of range, then we don't try. This ensures that we
15541 can't fail later on. */
15542 if (min_address
>= minipool_barrier
->address
15543 || (minipool_vector_tail
->min_address
+ fix
->fix_size
15544 >= minipool_barrier
->address
))
15547 /* Scan the pool to see if a constant with the same value has
15548 already been added. While we are doing this, also note the
15549 location where we must insert the constant if it doesn't already
15551 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
15553 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15554 && fix
->mode
== mp
->mode
15555 && (!LABEL_P (fix
->value
)
15556 || (CODE_LABEL_NUMBER (fix
->value
)
15557 == CODE_LABEL_NUMBER (mp
->value
)))
15558 && rtx_equal_p (fix
->value
, mp
->value
)
15559 /* Check that there is enough slack to move this entry to the
15560 end of the table (this is conservative). */
15561 && (mp
->max_address
15562 > (minipool_barrier
->address
15563 + minipool_vector_tail
->offset
15564 + minipool_vector_tail
->fix_size
)))
15567 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
15570 if (min_mp
!= NULL
)
15571 mp
->min_address
+= fix
->fix_size
;
15574 /* Note the insertion point if necessary. */
15575 if (mp
->min_address
< min_address
)
15577 /* For now, we do not allow the insertion of 8-byte alignment
15578 requiring nodes anywhere but at the start of the pool. */
15579 if (ARM_DOUBLEWORD_ALIGN
15580 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15585 else if (mp
->max_address
15586 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
15588 /* Inserting before this entry would push the fix beyond
15589 its maximum address (which can happen if we have
15590 re-located a forwards fix); force the new fix to come
15592 if (ARM_DOUBLEWORD_ALIGN
15593 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15598 min_address
= mp
->min_address
+ fix
->fix_size
;
15601 /* Do not insert a non-8-byte aligned quantity before 8-byte
15602 aligned quantities. */
15603 else if (ARM_DOUBLEWORD_ALIGN
15604 && fix
->fix_size
< 8
15605 && mp
->fix_size
>= 8)
15608 min_address
= mp
->min_address
+ fix
->fix_size
;
15613 /* We need to create a new entry. */
15615 mp
->fix_size
= fix
->fix_size
;
15616 mp
->mode
= fix
->mode
;
15617 mp
->value
= fix
->value
;
15619 mp
->max_address
= minipool_barrier
->address
+ 65536;
15621 mp
->min_address
= min_address
;
15623 if (min_mp
== NULL
)
15626 mp
->next
= minipool_vector_head
;
15628 if (mp
->next
== NULL
)
15630 minipool_vector_tail
= mp
;
15631 minipool_vector_label
= gen_label_rtx ();
15634 mp
->next
->prev
= mp
;
15636 minipool_vector_head
= mp
;
15640 mp
->next
= min_mp
->next
;
15644 if (mp
->next
!= NULL
)
15645 mp
->next
->prev
= mp
;
15647 minipool_vector_tail
= mp
;
15650 /* Save the new entry. */
15658 /* Scan over the following entries and adjust their offsets. */
15659 while (mp
->next
!= NULL
)
15661 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15662 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15665 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
15667 mp
->next
->offset
= mp
->offset
;
15676 assign_minipool_offsets (Mfix
*barrier
)
15678 HOST_WIDE_INT offset
= 0;
15681 minipool_barrier
= barrier
;
15683 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15685 mp
->offset
= offset
;
15687 if (mp
->refcount
> 0)
15688 offset
+= mp
->fix_size
;
15692 /* Output the literal table */
15694 dump_minipool (rtx_insn
*scan
)
15700 if (ARM_DOUBLEWORD_ALIGN
)
15701 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15702 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
15709 fprintf (dump_file
,
15710 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
15711 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
15713 scan
= emit_label_after (gen_label_rtx (), scan
);
15714 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
15715 scan
= emit_label_after (minipool_vector_label
, scan
);
15717 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
15719 if (mp
->refcount
> 0)
15723 fprintf (dump_file
,
15724 ";; Offset %u, min %ld, max %ld ",
15725 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
15726 (unsigned long) mp
->max_address
);
15727 arm_print_value (dump_file
, mp
->value
);
15728 fputc ('\n', dump_file
);
15731 switch (GET_MODE_SIZE (mp
->mode
))
15733 #ifdef HAVE_consttable_1
15735 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
15739 #ifdef HAVE_consttable_2
15741 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
15745 #ifdef HAVE_consttable_4
15747 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
15751 #ifdef HAVE_consttable_8
15753 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
15757 #ifdef HAVE_consttable_16
15759 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
15764 gcc_unreachable ();
15772 minipool_vector_head
= minipool_vector_tail
= NULL
;
15773 scan
= emit_insn_after (gen_consttable_end (), scan
);
15774 scan
= emit_barrier_after (scan
);
15777 /* Return the cost of forcibly inserting a barrier after INSN. */
15779 arm_barrier_cost (rtx_insn
*insn
)
15781 /* Basing the location of the pool on the loop depth is preferable,
15782 but at the moment, the basic block information seems to be
15783 corrupt by this stage of the compilation. */
15784 int base_cost
= 50;
15785 rtx_insn
*next
= next_nonnote_insn (insn
);
15787 if (next
!= NULL
&& LABEL_P (next
))
15790 switch (GET_CODE (insn
))
15793 /* It will always be better to place the table before the label, rather
15802 return base_cost
- 10;
15805 return base_cost
+ 10;
15809 /* Find the best place in the insn stream in the range
15810 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
15811 Create the barrier by inserting a jump and add a new fix entry for
15814 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
15816 HOST_WIDE_INT count
= 0;
15817 rtx_barrier
*barrier
;
15818 rtx_insn
*from
= fix
->insn
;
15819 /* The instruction after which we will insert the jump. */
15820 rtx_insn
*selected
= NULL
;
15822 /* The address at which the jump instruction will be placed. */
15823 HOST_WIDE_INT selected_address
;
15825 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
15826 rtx_code_label
*label
= gen_label_rtx ();
15828 selected_cost
= arm_barrier_cost (from
);
15829 selected_address
= fix
->address
;
15831 while (from
&& count
< max_count
)
15833 rtx_jump_table_data
*tmp
;
15836 /* This code shouldn't have been called if there was a natural barrier
15838 gcc_assert (!BARRIER_P (from
));
15840 /* Count the length of this insn. This must stay in sync with the
15841 code that pushes minipool fixes. */
15842 if (LABEL_P (from
))
15843 count
+= get_label_padding (from
);
15845 count
+= get_attr_length (from
);
15847 /* If there is a jump table, add its length. */
15848 if (tablejump_p (from
, NULL
, &tmp
))
15850 count
+= get_jump_table_size (tmp
);
15852 /* Jump tables aren't in a basic block, so base the cost on
15853 the dispatch insn. If we select this location, we will
15854 still put the pool after the table. */
15855 new_cost
= arm_barrier_cost (from
);
15857 if (count
< max_count
15858 && (!selected
|| new_cost
<= selected_cost
))
15861 selected_cost
= new_cost
;
15862 selected_address
= fix
->address
+ count
;
15865 /* Continue after the dispatch table. */
15866 from
= NEXT_INSN (tmp
);
15870 new_cost
= arm_barrier_cost (from
);
15872 if (count
< max_count
15873 && (!selected
|| new_cost
<= selected_cost
))
15876 selected_cost
= new_cost
;
15877 selected_address
= fix
->address
+ count
;
15880 from
= NEXT_INSN (from
);
15883 /* Make sure that we found a place to insert the jump. */
15884 gcc_assert (selected
);
15886 /* Make sure we do not split a call and its corresponding
15887 CALL_ARG_LOCATION note. */
15888 if (CALL_P (selected
))
15890 rtx_insn
*next
= NEXT_INSN (selected
);
15891 if (next
&& NOTE_P (next
)
15892 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
15896 /* Create a new JUMP_INSN that branches around a barrier. */
15897 from
= emit_jump_insn_after (gen_jump (label
), selected
);
15898 JUMP_LABEL (from
) = label
;
15899 barrier
= emit_barrier_after (from
);
15900 emit_label_after (label
, barrier
);
15902 /* Create a minipool barrier entry for the new barrier. */
15903 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
15904 new_fix
->insn
= barrier
;
15905 new_fix
->address
= selected_address
;
15906 new_fix
->next
= fix
->next
;
15907 fix
->next
= new_fix
;
15912 /* Record that there is a natural barrier in the insn stream at
15915 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
15917 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
15920 fix
->address
= address
;
15923 if (minipool_fix_head
!= NULL
)
15924 minipool_fix_tail
->next
= fix
;
15926 minipool_fix_head
= fix
;
15928 minipool_fix_tail
= fix
;
15931 /* Record INSN, which will need fixing up to load a value from the
15932 minipool. ADDRESS is the offset of the insn since the start of the
15933 function; LOC is a pointer to the part of the insn which requires
15934 fixing; VALUE is the constant that must be loaded, which is of type
15937 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
15938 machine_mode mode
, rtx value
)
15940 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
15943 fix
->address
= address
;
15946 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
15947 fix
->value
= value
;
15948 fix
->forwards
= get_attr_pool_range (insn
);
15949 fix
->backwards
= get_attr_neg_pool_range (insn
);
15950 fix
->minipool
= NULL
;
15952 /* If an insn doesn't have a range defined for it, then it isn't
15953 expecting to be reworked by this code. Better to stop now than
15954 to generate duff assembly code. */
15955 gcc_assert (fix
->forwards
|| fix
->backwards
);
15957 /* If an entry requires 8-byte alignment then assume all constant pools
15958 require 4 bytes of padding. Trying to do this later on a per-pool
15959 basis is awkward because existing pool entries have to be modified. */
15960 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
15965 fprintf (dump_file
,
15966 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
15967 GET_MODE_NAME (mode
),
15968 INSN_UID (insn
), (unsigned long) address
,
15969 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
15970 arm_print_value (dump_file
, fix
->value
);
15971 fprintf (dump_file
, "\n");
15974 /* Add it to the chain of fixes. */
15977 if (minipool_fix_head
!= NULL
)
15978 minipool_fix_tail
->next
= fix
;
15980 minipool_fix_head
= fix
;
15982 minipool_fix_tail
= fix
;
15985 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
15986 Returns the number of insns needed, or 99 if we always want to synthesize
15989 arm_max_const_double_inline_cost ()
15991 /* Let the value get synthesized to avoid the use of literal pools. */
15992 if (arm_disable_literal_pool
)
15995 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
15998 /* Return the cost of synthesizing a 64-bit constant VAL inline.
15999 Returns the number of insns needed, or 99 if we don't know how to
16002 arm_const_double_inline_cost (rtx val
)
16004 rtx lowpart
, highpart
;
16007 mode
= GET_MODE (val
);
16009 if (mode
== VOIDmode
)
16012 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16014 lowpart
= gen_lowpart (SImode
, val
);
16015 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16017 gcc_assert (CONST_INT_P (lowpart
));
16018 gcc_assert (CONST_INT_P (highpart
));
16020 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16021 NULL_RTX
, NULL_RTX
, 0, 0)
16022 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16023 NULL_RTX
, NULL_RTX
, 0, 0));
16026 /* Cost of loading a SImode constant. */
16028 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16030 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16031 NULL_RTX
, NULL_RTX
, 1, 0);
16034 /* Return true if it is worthwhile to split a 64-bit constant into two
16035 32-bit operations. This is the case if optimizing for size, or
16036 if we have load delay slots, or if one 32-bit part can be done with
16037 a single data operation. */
16039 arm_const_double_by_parts (rtx val
)
16041 machine_mode mode
= GET_MODE (val
);
16044 if (optimize_size
|| arm_ld_sched
)
16047 if (mode
== VOIDmode
)
16050 part
= gen_highpart_mode (SImode
, mode
, val
);
16052 gcc_assert (CONST_INT_P (part
));
16054 if (const_ok_for_arm (INTVAL (part
))
16055 || const_ok_for_arm (~INTVAL (part
)))
16058 part
= gen_lowpart (SImode
, val
);
16060 gcc_assert (CONST_INT_P (part
));
16062 if (const_ok_for_arm (INTVAL (part
))
16063 || const_ok_for_arm (~INTVAL (part
)))
16069 /* Return true if it is possible to inline both the high and low parts
16070 of a 64-bit constant into 32-bit data processing instructions. */
16072 arm_const_double_by_immediates (rtx val
)
16074 machine_mode mode
= GET_MODE (val
);
16077 if (mode
== VOIDmode
)
16080 part
= gen_highpart_mode (SImode
, mode
, val
);
16082 gcc_assert (CONST_INT_P (part
));
16084 if (!const_ok_for_arm (INTVAL (part
)))
16087 part
= gen_lowpart (SImode
, val
);
16089 gcc_assert (CONST_INT_P (part
));
16091 if (!const_ok_for_arm (INTVAL (part
)))
16097 /* Scan INSN and note any of its operands that need fixing.
16098 If DO_PUSHES is false we do not actually push any of the fixups
16101 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16105 extract_constrain_insn (insn
);
16107 if (recog_data
.n_alternatives
== 0)
16110 /* Fill in recog_op_alt with information about the constraints of
16112 preprocess_constraints (insn
);
16114 const operand_alternative
*op_alt
= which_op_alt ();
16115 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16117 /* Things we need to fix can only occur in inputs. */
16118 if (recog_data
.operand_type
[opno
] != OP_IN
)
16121 /* If this alternative is a memory reference, then any mention
16122 of constants in this alternative is really to fool reload
16123 into allowing us to accept one there. We need to fix them up
16124 now so that we output the right code. */
16125 if (op_alt
[opno
].memory_ok
)
16127 rtx op
= recog_data
.operand
[opno
];
16129 if (CONSTANT_P (op
))
16132 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16133 recog_data
.operand_mode
[opno
], op
);
16135 else if (MEM_P (op
)
16136 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16137 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16141 rtx cop
= avoid_constant_pool_reference (op
);
16143 /* Casting the address of something to a mode narrower
16144 than a word can cause avoid_constant_pool_reference()
16145 to return the pool reference itself. That's no good to
16146 us here. Lets just hope that we can use the
16147 constant pool value directly. */
16149 cop
= get_pool_constant (XEXP (op
, 0));
16151 push_minipool_fix (insn
, address
,
16152 recog_data
.operand_loc
[opno
],
16153 recog_data
.operand_mode
[opno
], cop
);
16163 /* Rewrite move insn into subtract of 0 if the condition codes will
16164 be useful in next conditional jump insn. */
16167 thumb1_reorg (void)
16171 FOR_EACH_BB_FN (bb
, cfun
)
16174 rtx cmp
, op0
, op1
, set
= NULL
;
16175 rtx_insn
*prev
, *insn
= BB_END (bb
);
16176 bool insn_clobbered
= false;
16178 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
16179 insn
= PREV_INSN (insn
);
16181 /* Find the last cbranchsi4_insn in basic block BB. */
16182 if (insn
== BB_HEAD (bb
)
16183 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
16186 /* Get the register with which we are comparing. */
16187 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
16188 op0
= XEXP (cmp
, 0);
16189 op1
= XEXP (cmp
, 1);
16191 /* Check that comparison is against ZERO. */
16192 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
16195 /* Find the first flag setting insn before INSN in basic block BB. */
16196 gcc_assert (insn
!= BB_HEAD (bb
));
16197 for (prev
= PREV_INSN (insn
);
16199 && prev
!= BB_HEAD (bb
)
16201 || DEBUG_INSN_P (prev
)
16202 || ((set
= single_set (prev
)) != NULL
16203 && get_attr_conds (prev
) == CONDS_NOCOND
)));
16204 prev
= PREV_INSN (prev
))
16206 if (reg_set_p (op0
, prev
))
16207 insn_clobbered
= true;
16210 /* Skip if op0 is clobbered by insn other than prev. */
16211 if (insn_clobbered
)
16217 dest
= SET_DEST (set
);
16218 src
= SET_SRC (set
);
16219 if (!low_register_operand (dest
, SImode
)
16220 || !low_register_operand (src
, SImode
))
16223 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16224 in INSN. Both src and dest of the move insn are checked. */
16225 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
16227 dest
= copy_rtx (dest
);
16228 src
= copy_rtx (src
);
16229 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
16230 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
16231 INSN_CODE (prev
) = -1;
16232 /* Set test register in INSN to dest. */
16233 XEXP (cmp
, 0) = copy_rtx (dest
);
16234 INSN_CODE (insn
) = -1;
16239 /* Convert instructions to their cc-clobbering variant if possible, since
16240 that allows us to use smaller encodings. */
16243 thumb2_reorg (void)
16248 INIT_REG_SET (&live
);
16250 /* We are freeing block_for_insn in the toplev to keep compatibility
16251 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16252 compute_bb_for_insn ();
16255 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
16257 FOR_EACH_BB_FN (bb
, cfun
)
16259 if ((current_tune
->disparage_flag_setting_t16_encodings
16260 == tune_params::DISPARAGE_FLAGS_ALL
)
16261 && optimize_bb_for_speed_p (bb
))
16265 Convert_Action action
= SKIP
;
16266 Convert_Action action_for_partial_flag_setting
16267 = ((current_tune
->disparage_flag_setting_t16_encodings
16268 != tune_params::DISPARAGE_FLAGS_NEITHER
)
16269 && optimize_bb_for_speed_p (bb
))
16272 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
16273 df_simulate_initialize_backwards (bb
, &live
);
16274 FOR_BB_INSNS_REVERSE (bb
, insn
)
16276 if (NONJUMP_INSN_P (insn
)
16277 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
16278 && GET_CODE (PATTERN (insn
)) == SET
)
16281 rtx pat
= PATTERN (insn
);
16282 rtx dst
= XEXP (pat
, 0);
16283 rtx src
= XEXP (pat
, 1);
16284 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
16286 if (UNARY_P (src
) || BINARY_P (src
))
16287 op0
= XEXP (src
, 0);
16289 if (BINARY_P (src
))
16290 op1
= XEXP (src
, 1);
16292 if (low_register_operand (dst
, SImode
))
16294 switch (GET_CODE (src
))
16297 /* Adding two registers and storing the result
16298 in the first source is already a 16-bit
16300 if (rtx_equal_p (dst
, op0
)
16301 && register_operand (op1
, SImode
))
16304 if (low_register_operand (op0
, SImode
))
16306 /* ADDS <Rd>,<Rn>,<Rm> */
16307 if (low_register_operand (op1
, SImode
))
16309 /* ADDS <Rdn>,#<imm8> */
16310 /* SUBS <Rdn>,#<imm8> */
16311 else if (rtx_equal_p (dst
, op0
)
16312 && CONST_INT_P (op1
)
16313 && IN_RANGE (INTVAL (op1
), -255, 255))
16315 /* ADDS <Rd>,<Rn>,#<imm3> */
16316 /* SUBS <Rd>,<Rn>,#<imm3> */
16317 else if (CONST_INT_P (op1
)
16318 && IN_RANGE (INTVAL (op1
), -7, 7))
16321 /* ADCS <Rd>, <Rn> */
16322 else if (GET_CODE (XEXP (src
, 0)) == PLUS
16323 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
16324 && low_register_operand (XEXP (XEXP (src
, 0), 1),
16326 && COMPARISON_P (op1
)
16327 && cc_register (XEXP (op1
, 0), VOIDmode
)
16328 && maybe_get_arm_condition_code (op1
) == ARM_CS
16329 && XEXP (op1
, 1) == const0_rtx
)
16334 /* RSBS <Rd>,<Rn>,#0
16335 Not handled here: see NEG below. */
16336 /* SUBS <Rd>,<Rn>,#<imm3>
16338 Not handled here: see PLUS above. */
16339 /* SUBS <Rd>,<Rn>,<Rm> */
16340 if (low_register_operand (op0
, SImode
)
16341 && low_register_operand (op1
, SImode
))
16346 /* MULS <Rdm>,<Rn>,<Rdm>
16347 As an exception to the rule, this is only used
16348 when optimizing for size since MULS is slow on all
16349 known implementations. We do not even want to use
16350 MULS in cold code, if optimizing for speed, so we
16351 test the global flag here. */
16352 if (!optimize_size
)
16354 /* Fall through. */
16358 /* ANDS <Rdn>,<Rm> */
16359 if (rtx_equal_p (dst
, op0
)
16360 && low_register_operand (op1
, SImode
))
16361 action
= action_for_partial_flag_setting
;
16362 else if (rtx_equal_p (dst
, op1
)
16363 && low_register_operand (op0
, SImode
))
16364 action
= action_for_partial_flag_setting
== SKIP
16365 ? SKIP
: SWAP_CONV
;
16371 /* ASRS <Rdn>,<Rm> */
16372 /* LSRS <Rdn>,<Rm> */
16373 /* LSLS <Rdn>,<Rm> */
16374 if (rtx_equal_p (dst
, op0
)
16375 && low_register_operand (op1
, SImode
))
16376 action
= action_for_partial_flag_setting
;
16377 /* ASRS <Rd>,<Rm>,#<imm5> */
16378 /* LSRS <Rd>,<Rm>,#<imm5> */
16379 /* LSLS <Rd>,<Rm>,#<imm5> */
16380 else if (low_register_operand (op0
, SImode
)
16381 && CONST_INT_P (op1
)
16382 && IN_RANGE (INTVAL (op1
), 0, 31))
16383 action
= action_for_partial_flag_setting
;
16387 /* RORS <Rdn>,<Rm> */
16388 if (rtx_equal_p (dst
, op0
)
16389 && low_register_operand (op1
, SImode
))
16390 action
= action_for_partial_flag_setting
;
16394 /* MVNS <Rd>,<Rm> */
16395 if (low_register_operand (op0
, SImode
))
16396 action
= action_for_partial_flag_setting
;
16400 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16401 if (low_register_operand (op0
, SImode
))
16406 /* MOVS <Rd>,#<imm8> */
16407 if (CONST_INT_P (src
)
16408 && IN_RANGE (INTVAL (src
), 0, 255))
16409 action
= action_for_partial_flag_setting
;
16413 /* MOVS and MOV<c> with registers have different
16414 encodings, so are not relevant here. */
16422 if (action
!= SKIP
)
16424 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
16425 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
16428 if (action
== SWAP_CONV
)
16430 src
= copy_rtx (src
);
16431 XEXP (src
, 0) = op1
;
16432 XEXP (src
, 1) = op0
;
16433 pat
= gen_rtx_SET (dst
, src
);
16434 vec
= gen_rtvec (2, pat
, clobber
);
16436 else /* action == CONV */
16437 vec
= gen_rtvec (2, pat
, clobber
);
16439 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
16440 INSN_CODE (insn
) = -1;
16444 if (NONDEBUG_INSN_P (insn
))
16445 df_simulate_one_insn_backwards (bb
, insn
, &live
);
16449 CLEAR_REG_SET (&live
);
16452 /* Gcc puts the pool in the wrong place for ARM, since we can only
16453 load addresses a limited distance around the pc. We do some
16454 special munging to move the constant pool values to the correct
16455 point in the code. */
16460 HOST_WIDE_INT address
= 0;
16465 else if (TARGET_THUMB2
)
16468 /* Ensure all insns that must be split have been split at this point.
16469 Otherwise, the pool placement code below may compute incorrect
16470 insn lengths. Note that when optimizing, all insns have already
16471 been split at this point. */
16473 split_all_insns_noflow ();
16475 minipool_fix_head
= minipool_fix_tail
= NULL
;
16477 /* The first insn must always be a note, or the code below won't
16478 scan it properly. */
16479 insn
= get_insns ();
16480 gcc_assert (NOTE_P (insn
));
16483 /* Scan all the insns and record the operands that will need fixing. */
16484 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
16486 if (BARRIER_P (insn
))
16487 push_minipool_barrier (insn
, address
);
16488 else if (INSN_P (insn
))
16490 rtx_jump_table_data
*table
;
16492 note_invalid_constants (insn
, address
, true);
16493 address
+= get_attr_length (insn
);
16495 /* If the insn is a vector jump, add the size of the table
16496 and skip the table. */
16497 if (tablejump_p (insn
, NULL
, &table
))
16499 address
+= get_jump_table_size (table
);
16503 else if (LABEL_P (insn
))
16504 /* Add the worst-case padding due to alignment. We don't add
16505 the _current_ padding because the minipool insertions
16506 themselves might change it. */
16507 address
+= get_label_padding (insn
);
16510 fix
= minipool_fix_head
;
16512 /* Now scan the fixups and perform the required changes. */
16517 Mfix
* last_added_fix
;
16518 Mfix
* last_barrier
= NULL
;
16521 /* Skip any further barriers before the next fix. */
16522 while (fix
&& BARRIER_P (fix
->insn
))
16525 /* No more fixes. */
16529 last_added_fix
= NULL
;
16531 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
16533 if (BARRIER_P (ftmp
->insn
))
16535 if (ftmp
->address
>= minipool_vector_head
->max_address
)
16538 last_barrier
= ftmp
;
16540 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
16543 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
16546 /* If we found a barrier, drop back to that; any fixes that we
16547 could have reached but come after the barrier will now go in
16548 the next mini-pool. */
16549 if (last_barrier
!= NULL
)
16551 /* Reduce the refcount for those fixes that won't go into this
16553 for (fdel
= last_barrier
->next
;
16554 fdel
&& fdel
!= ftmp
;
16557 fdel
->minipool
->refcount
--;
16558 fdel
->minipool
= NULL
;
16561 ftmp
= last_barrier
;
16565 /* ftmp is first fix that we can't fit into this pool and
16566 there no natural barriers that we could use. Insert a
16567 new barrier in the code somewhere between the previous
16568 fix and this one, and arrange to jump around it. */
16569 HOST_WIDE_INT max_address
;
16571 /* The last item on the list of fixes must be a barrier, so
16572 we can never run off the end of the list of fixes without
16573 last_barrier being set. */
16576 max_address
= minipool_vector_head
->max_address
;
16577 /* Check that there isn't another fix that is in range that
16578 we couldn't fit into this pool because the pool was
16579 already too large: we need to put the pool before such an
16580 instruction. The pool itself may come just after the
16581 fix because create_fix_barrier also allows space for a
16582 jump instruction. */
16583 if (ftmp
->address
< max_address
)
16584 max_address
= ftmp
->address
+ 1;
16586 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
16589 assign_minipool_offsets (last_barrier
);
16593 if (!BARRIER_P (ftmp
->insn
)
16594 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
16601 /* Scan over the fixes we have identified for this pool, fixing them
16602 up and adding the constants to the pool itself. */
16603 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
16604 this_fix
= this_fix
->next
)
16605 if (!BARRIER_P (this_fix
->insn
))
16608 = plus_constant (Pmode
,
16609 gen_rtx_LABEL_REF (VOIDmode
,
16610 minipool_vector_label
),
16611 this_fix
->minipool
->offset
);
16612 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
16615 dump_minipool (last_barrier
->insn
);
16619 /* From now on we must synthesize any constants that we can't handle
16620 directly. This can happen if the RTL gets split during final
16621 instruction generation. */
16622 cfun
->machine
->after_arm_reorg
= 1;
16624 /* Free the minipool memory. */
16625 obstack_free (&minipool_obstack
, minipool_startobj
);
16628 /* Routines to output assembly language. */
16630 /* Return string representation of passed in real value. */
16631 static const char *
16632 fp_const_from_val (REAL_VALUE_TYPE
*r
)
16634 if (!fp_consts_inited
)
16637 gcc_assert (real_equal (r
, &value_fp0
));
16641 /* OPERANDS[0] is the entire list of insns that constitute pop,
16642 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
16643 is in the list, UPDATE is true iff the list contains explicit
16644 update of base register. */
16646 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
16652 const char *conditional
;
16653 int num_saves
= XVECLEN (operands
[0], 0);
16654 unsigned int regno
;
16655 unsigned int regno_base
= REGNO (operands
[1]);
16656 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
16659 offset
+= update
? 1 : 0;
16660 offset
+= return_pc
? 1 : 0;
16662 /* Is the base register in the list? */
16663 for (i
= offset
; i
< num_saves
; i
++)
16665 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
16666 /* If SP is in the list, then the base register must be SP. */
16667 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
16668 /* If base register is in the list, there must be no explicit update. */
16669 if (regno
== regno_base
)
16670 gcc_assert (!update
);
16673 conditional
= reverse
? "%?%D0" : "%?%d0";
16674 /* Can't use POP if returning from an interrupt. */
16675 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
16676 sprintf (pattern
, "pop%s\t{", conditional
);
16679 /* Output ldmfd when the base register is SP, otherwise output ldmia.
16680 It's just a convention, their semantics are identical. */
16681 if (regno_base
== SP_REGNUM
)
16682 sprintf (pattern
, "ldmfd%s\t", conditional
);
16684 sprintf (pattern
, "ldmia%s\t", conditional
);
16686 sprintf (pattern
, "ldm%s\t", conditional
);
16688 strcat (pattern
, reg_names
[regno_base
]);
16690 strcat (pattern
, "!, {");
16692 strcat (pattern
, ", {");
16695 /* Output the first destination register. */
16697 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
16699 /* Output the rest of the destination registers. */
16700 for (i
= offset
+ 1; i
< num_saves
; i
++)
16702 strcat (pattern
, ", ");
16704 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
16707 strcat (pattern
, "}");
16709 if (interrupt_p
&& return_pc
)
16710 strcat (pattern
, "^");
16712 output_asm_insn (pattern
, &cond
);
16716 /* Output the assembly for a store multiple. */
16719 vfp_output_vstmd (rtx
* operands
)
16725 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
16726 ? XEXP (operands
[0], 0)
16727 : XEXP (XEXP (operands
[0], 0), 0);
16728 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
16731 strcpy (pattern
, "vpush%?.64\t{%P1");
16733 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
16735 p
= strlen (pattern
);
16737 gcc_assert (REG_P (operands
[1]));
16739 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
16740 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
16742 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
16744 strcpy (&pattern
[p
], "}");
16746 output_asm_insn (pattern
, operands
);
16751 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
16752 number of bytes pushed. */
16755 vfp_emit_fstmd (int base_reg
, int count
)
16762 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
16763 register pairs are stored by a store multiple insn. We avoid this
16764 by pushing an extra pair. */
16765 if (count
== 2 && !arm_arch6
)
16767 if (base_reg
== LAST_VFP_REGNUM
- 3)
16772 /* FSTMD may not store more than 16 doubleword registers at once. Split
16773 larger stores into multiple parts (up to a maximum of two, in
16778 /* NOTE: base_reg is an internal register number, so each D register
16780 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
16781 saved
+= vfp_emit_fstmd (base_reg
, 16);
16785 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
16786 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
16788 reg
= gen_rtx_REG (DFmode
, base_reg
);
16791 XVECEXP (par
, 0, 0)
16792 = gen_rtx_SET (gen_frame_mem
16794 gen_rtx_PRE_MODIFY (Pmode
,
16797 (Pmode
, stack_pointer_rtx
,
16800 gen_rtx_UNSPEC (BLKmode
,
16801 gen_rtvec (1, reg
),
16802 UNSPEC_PUSH_MULT
));
16804 tmp
= gen_rtx_SET (stack_pointer_rtx
,
16805 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
16806 RTX_FRAME_RELATED_P (tmp
) = 1;
16807 XVECEXP (dwarf
, 0, 0) = tmp
;
16809 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
16810 RTX_FRAME_RELATED_P (tmp
) = 1;
16811 XVECEXP (dwarf
, 0, 1) = tmp
;
16813 for (i
= 1; i
< count
; i
++)
16815 reg
= gen_rtx_REG (DFmode
, base_reg
);
16817 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
16819 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
16820 plus_constant (Pmode
,
16824 RTX_FRAME_RELATED_P (tmp
) = 1;
16825 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
16828 par
= emit_insn (par
);
16829 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
16830 RTX_FRAME_RELATED_P (par
) = 1;
16835 /* Emit a call instruction with pattern PAT. ADDR is the address of
16836 the call target. */
16839 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
16843 insn
= emit_call_insn (pat
);
16845 /* The PIC register is live on entry to VxWorks PIC PLT entries.
16846 If the call might use such an entry, add a use of the PIC register
16847 to the instruction's CALL_INSN_FUNCTION_USAGE. */
16848 if (TARGET_VXWORKS_RTP
16851 && GET_CODE (addr
) == SYMBOL_REF
16852 && (SYMBOL_REF_DECL (addr
)
16853 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
16854 : !SYMBOL_REF_LOCAL_P (addr
)))
16856 require_pic_register ();
16857 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
16860 if (TARGET_AAPCS_BASED
)
16862 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
16863 linker. We need to add an IP clobber to allow setting
16864 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
16865 is not needed since it's a fixed register. */
16866 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
16867 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
16871 /* Output a 'call' insn. */
16873 output_call (rtx
*operands
)
16875 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
16877 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
16878 if (REGNO (operands
[0]) == LR_REGNUM
)
16880 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
16881 output_asm_insn ("mov%?\t%0, %|lr", operands
);
16884 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
16886 if (TARGET_INTERWORK
|| arm_arch4t
)
16887 output_asm_insn ("bx%?\t%0", operands
);
16889 output_asm_insn ("mov%?\t%|pc, %0", operands
);
16894 /* Output a move from arm registers to arm registers of a long double
16895 OPERANDS[0] is the destination.
16896 OPERANDS[1] is the source. */
16898 output_mov_long_double_arm_from_arm (rtx
*operands
)
16900 /* We have to be careful here because the two might overlap. */
16901 int dest_start
= REGNO (operands
[0]);
16902 int src_start
= REGNO (operands
[1]);
16906 if (dest_start
< src_start
)
16908 for (i
= 0; i
< 3; i
++)
16910 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
16911 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
16912 output_asm_insn ("mov%?\t%0, %1", ops
);
16917 for (i
= 2; i
>= 0; i
--)
16919 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
16920 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
16921 output_asm_insn ("mov%?\t%0, %1", ops
);
16929 arm_emit_movpair (rtx dest
, rtx src
)
16933 /* If the src is an immediate, simplify it. */
16934 if (CONST_INT_P (src
))
16936 HOST_WIDE_INT val
= INTVAL (src
);
16937 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
16938 if ((val
>> 16) & 0x0000ffff)
16940 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
16942 GEN_INT ((val
>> 16) & 0x0000ffff));
16943 insn
= get_last_insn ();
16944 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
16948 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
16949 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
16950 insn
= get_last_insn ();
16951 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
16954 /* Output a move between double words. It must be REG<-MEM
16957 output_move_double (rtx
*operands
, bool emit
, int *count
)
16959 enum rtx_code code0
= GET_CODE (operands
[0]);
16960 enum rtx_code code1
= GET_CODE (operands
[1]);
16965 /* The only case when this might happen is when
16966 you are looking at the length of a DImode instruction
16967 that has an invalid constant in it. */
16968 if (code0
== REG
&& code1
!= MEM
)
16970 gcc_assert (!emit
);
16977 unsigned int reg0
= REGNO (operands
[0]);
16979 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
16981 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
16983 switch (GET_CODE (XEXP (operands
[1], 0)))
16990 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
16991 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
16993 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
16998 gcc_assert (TARGET_LDRD
);
17000 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17007 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17009 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
17017 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
17019 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
17024 gcc_assert (TARGET_LDRD
);
17026 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
17031 /* Autoicrement addressing modes should never have overlapping
17032 base and destination registers, and overlapping index registers
17033 are already prohibited, so this doesn't need to worry about
17035 otherops
[0] = operands
[0];
17036 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17037 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17039 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17041 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17043 /* Registers overlap so split out the increment. */
17046 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17047 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
17054 /* Use a single insn if we can.
17055 FIXME: IWMMXT allows offsets larger than ldrd can
17056 handle, fix these up with a pair of ldr. */
17058 || !CONST_INT_P (otherops
[2])
17059 || (INTVAL (otherops
[2]) > -256
17060 && INTVAL (otherops
[2]) < 256))
17063 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
17069 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17070 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17080 /* Use a single insn if we can.
17081 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17082 fix these up with a pair of ldr. */
17084 || !CONST_INT_P (otherops
[2])
17085 || (INTVAL (otherops
[2]) > -256
17086 && INTVAL (otherops
[2]) < 256))
17089 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
17095 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17096 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17106 /* We might be able to use ldrd %0, %1 here. However the range is
17107 different to ldr/adr, and it is broken on some ARMv7-M
17108 implementations. */
17109 /* Use the second register of the pair to avoid problematic
17111 otherops
[1] = operands
[1];
17113 output_asm_insn ("adr%?\t%0, %1", otherops
);
17114 operands
[1] = otherops
[0];
17118 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
17120 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
17127 /* ??? This needs checking for thumb2. */
17129 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17130 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17132 otherops
[0] = operands
[0];
17133 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17134 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17136 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17138 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17140 switch ((int) INTVAL (otherops
[2]))
17144 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
17150 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
17156 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
17160 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17161 operands
[1] = otherops
[0];
17163 && (REG_P (otherops
[2])
17165 || (CONST_INT_P (otherops
[2])
17166 && INTVAL (otherops
[2]) > -256
17167 && INTVAL (otherops
[2]) < 256)))
17169 if (reg_overlap_mentioned_p (operands
[0],
17172 /* Swap base and index registers over to
17173 avoid a conflict. */
17174 std::swap (otherops
[1], otherops
[2]);
17176 /* If both registers conflict, it will usually
17177 have been fixed by a splitter. */
17178 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
17179 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
17183 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17184 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
17191 otherops
[0] = operands
[0];
17193 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
17198 if (CONST_INT_P (otherops
[2]))
17202 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
17203 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
17205 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17211 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17217 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
17224 return "ldrd%?\t%0, [%1]";
17226 return "ldmia%?\t%1, %M0";
17230 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
17231 /* Take care of overlapping base/data reg. */
17232 if (reg_mentioned_p (operands
[0], operands
[1]))
17236 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17237 output_asm_insn ("ldr%?\t%0, %1", operands
);
17247 output_asm_insn ("ldr%?\t%0, %1", operands
);
17248 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17258 /* Constraints should ensure this. */
17259 gcc_assert (code0
== MEM
&& code1
== REG
);
17260 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
17261 || (TARGET_ARM
&& TARGET_LDRD
));
17263 switch (GET_CODE (XEXP (operands
[0], 0)))
17269 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
17271 output_asm_insn ("stm%?\t%m0, %M1", operands
);
17276 gcc_assert (TARGET_LDRD
);
17278 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
17285 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
17287 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
17295 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
17297 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
17302 gcc_assert (TARGET_LDRD
);
17304 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
17309 otherops
[0] = operands
[1];
17310 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
17311 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
17313 /* IWMMXT allows offsets larger than ldrd can handle,
17314 fix these up with a pair of ldr. */
17316 && CONST_INT_P (otherops
[2])
17317 && (INTVAL(otherops
[2]) <= -256
17318 || INTVAL(otherops
[2]) >= 256))
17320 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17324 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
17325 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17334 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17335 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
17341 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17344 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
17349 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
17354 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
17355 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17357 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
17361 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
17368 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
17375 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
17380 && (REG_P (otherops
[2])
17382 || (CONST_INT_P (otherops
[2])
17383 && INTVAL (otherops
[2]) > -256
17384 && INTVAL (otherops
[2]) < 256)))
17386 otherops
[0] = operands
[1];
17387 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
17389 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
17395 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
17396 otherops
[1] = operands
[1];
17399 output_asm_insn ("str%?\t%1, %0", operands
);
17400 output_asm_insn ("str%?\t%H1, %0", otherops
);
17410 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17411 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17414 output_move_quad (rtx
*operands
)
17416 if (REG_P (operands
[0]))
17418 /* Load, or reg->reg move. */
17420 if (MEM_P (operands
[1]))
17422 switch (GET_CODE (XEXP (operands
[1], 0)))
17425 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17430 output_asm_insn ("adr%?\t%0, %1", operands
);
17431 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
17435 gcc_unreachable ();
17443 gcc_assert (REG_P (operands
[1]));
17445 dest
= REGNO (operands
[0]);
17446 src
= REGNO (operands
[1]);
17448 /* This seems pretty dumb, but hopefully GCC won't try to do it
17451 for (i
= 0; i
< 4; i
++)
17453 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17454 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17455 output_asm_insn ("mov%?\t%0, %1", ops
);
17458 for (i
= 3; i
>= 0; i
--)
17460 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17461 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17462 output_asm_insn ("mov%?\t%0, %1", ops
);
17468 gcc_assert (MEM_P (operands
[0]));
17469 gcc_assert (REG_P (operands
[1]));
17470 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
17472 switch (GET_CODE (XEXP (operands
[0], 0)))
17475 output_asm_insn ("stm%?\t%m0, %M1", operands
);
17479 gcc_unreachable ();
17486 /* Output a VFP load or store instruction. */
17489 output_move_vfp (rtx
*operands
)
17491 rtx reg
, mem
, addr
, ops
[2];
17492 int load
= REG_P (operands
[0]);
17493 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
17494 int sp
= (!TARGET_VFP_FP16INST
17495 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
17496 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
17501 reg
= operands
[!load
];
17502 mem
= operands
[load
];
17504 mode
= GET_MODE (reg
);
17506 gcc_assert (REG_P (reg
));
17507 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
17508 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
17514 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
17515 gcc_assert (MEM_P (mem
));
17517 addr
= XEXP (mem
, 0);
17519 switch (GET_CODE (addr
))
17522 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
17523 ops
[0] = XEXP (addr
, 0);
17528 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
17529 ops
[0] = XEXP (addr
, 0);
17534 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
17540 sprintf (buff
, templ
,
17541 load
? "ld" : "st",
17542 dp
? "64" : sp
? "32" : "16",
17544 integer_p
? "\t%@ int" : "");
17545 output_asm_insn (buff
, ops
);
17550 /* Output a Neon double-word or quad-word load or store, or a load
17551 or store for larger structure modes.
17553 WARNING: The ordering of elements is weird in big-endian mode,
17554 because the EABI requires that vectors stored in memory appear
17555 as though they were stored by a VSTM, as required by the EABI.
17556 GCC RTL defines element ordering based on in-memory order.
17557 This can be different from the architectural ordering of elements
17558 within a NEON register. The intrinsics defined in arm_neon.h use the
17559 NEON register element ordering, not the GCC RTL element ordering.
17561 For example, the in-memory ordering of a big-endian a quadword
17562 vector with 16-bit elements when stored from register pair {d0,d1}
17563 will be (lowest address first, d0[N] is NEON register element N):
17565 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
17567 When necessary, quadword registers (dN, dN+1) are moved to ARM
17568 registers from rN in the order:
17570 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
17572 So that STM/LDM can be used on vectors in ARM registers, and the
17573 same memory layout will result as if VSTM/VLDM were used.
17575 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
17576 possible, which allows use of appropriate alignment tags.
17577 Note that the choice of "64" is independent of the actual vector
17578 element size; this size simply ensures that the behavior is
17579 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
17581 Due to limitations of those instructions, use of VST1.64/VLD1.64
17582 is not possible if:
17583 - the address contains PRE_DEC, or
17584 - the mode refers to more than 4 double-word registers
17586 In those cases, it would be possible to replace VSTM/VLDM by a
17587 sequence of instructions; this is not currently implemented since
17588 this is not certain to actually improve performance. */
17591 output_move_neon (rtx
*operands
)
17593 rtx reg
, mem
, addr
, ops
[2];
17594 int regno
, nregs
, load
= REG_P (operands
[0]);
17599 reg
= operands
[!load
];
17600 mem
= operands
[load
];
17602 mode
= GET_MODE (reg
);
17604 gcc_assert (REG_P (reg
));
17605 regno
= REGNO (reg
);
17606 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
17607 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
17608 || NEON_REGNO_OK_FOR_QUAD (regno
));
17609 gcc_assert (VALID_NEON_DREG_MODE (mode
)
17610 || VALID_NEON_QREG_MODE (mode
)
17611 || VALID_NEON_STRUCT_MODE (mode
));
17612 gcc_assert (MEM_P (mem
));
17614 addr
= XEXP (mem
, 0);
17616 /* Strip off const from addresses like (const (plus (...))). */
17617 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
17618 addr
= XEXP (addr
, 0);
17620 switch (GET_CODE (addr
))
17623 /* We have to use vldm / vstm for too-large modes. */
17626 templ
= "v%smia%%?\t%%0!, %%h1";
17627 ops
[0] = XEXP (addr
, 0);
17631 templ
= "v%s1.64\t%%h1, %%A0";
17638 /* We have to use vldm / vstm in this case, since there is no
17639 pre-decrement form of the vld1 / vst1 instructions. */
17640 templ
= "v%smdb%%?\t%%0!, %%h1";
17641 ops
[0] = XEXP (addr
, 0);
17646 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
17647 gcc_unreachable ();
17650 /* We have to use vldm / vstm for too-large modes. */
17654 templ
= "v%smia%%?\t%%m0, %%h1";
17656 templ
= "v%s1.64\t%%h1, %%A0";
17662 /* Fall through. */
17668 for (i
= 0; i
< nregs
; i
++)
17670 /* We're only using DImode here because it's a convenient size. */
17671 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
17672 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
17673 if (reg_overlap_mentioned_p (ops
[0], mem
))
17675 gcc_assert (overlap
== -1);
17680 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
17681 output_asm_insn (buff
, ops
);
17686 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
17687 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
17688 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
17689 output_asm_insn (buff
, ops
);
17696 gcc_unreachable ();
17699 sprintf (buff
, templ
, load
? "ld" : "st");
17700 output_asm_insn (buff
, ops
);
17705 /* Compute and return the length of neon_mov<mode>, where <mode> is
17706 one of VSTRUCT modes: EI, OI, CI or XI. */
17708 arm_attr_length_move_neon (rtx_insn
*insn
)
17710 rtx reg
, mem
, addr
;
17714 extract_insn_cached (insn
);
17716 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
17718 mode
= GET_MODE (recog_data
.operand
[0]);
17729 gcc_unreachable ();
17733 load
= REG_P (recog_data
.operand
[0]);
17734 reg
= recog_data
.operand
[!load
];
17735 mem
= recog_data
.operand
[load
];
17737 gcc_assert (MEM_P (mem
));
17739 mode
= GET_MODE (reg
);
17740 addr
= XEXP (mem
, 0);
17742 /* Strip off const from addresses like (const (plus (...))). */
17743 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
17744 addr
= XEXP (addr
, 0);
17746 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
17748 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
17755 /* Return nonzero if the offset in the address is an immediate. Otherwise,
17759 arm_address_offset_is_imm (rtx_insn
*insn
)
17763 extract_insn_cached (insn
);
17765 if (REG_P (recog_data
.operand
[0]))
17768 mem
= recog_data
.operand
[0];
17770 gcc_assert (MEM_P (mem
));
17772 addr
= XEXP (mem
, 0);
17775 || (GET_CODE (addr
) == PLUS
17776 && REG_P (XEXP (addr
, 0))
17777 && CONST_INT_P (XEXP (addr
, 1))))
17783 /* Output an ADD r, s, #n where n may be too big for one instruction.
17784 If adding zero to one register, output nothing. */
17786 output_add_immediate (rtx
*operands
)
17788 HOST_WIDE_INT n
= INTVAL (operands
[2]);
17790 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
17793 output_multi_immediate (operands
,
17794 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
17797 output_multi_immediate (operands
,
17798 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
17805 /* Output a multiple immediate operation.
17806 OPERANDS is the vector of operands referred to in the output patterns.
17807 INSTR1 is the output pattern to use for the first constant.
17808 INSTR2 is the output pattern to use for subsequent constants.
17809 IMMED_OP is the index of the constant slot in OPERANDS.
17810 N is the constant value. */
17811 static const char *
17812 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
17813 int immed_op
, HOST_WIDE_INT n
)
17815 #if HOST_BITS_PER_WIDE_INT > 32
17821 /* Quick and easy output. */
17822 operands
[immed_op
] = const0_rtx
;
17823 output_asm_insn (instr1
, operands
);
17828 const char * instr
= instr1
;
17830 /* Note that n is never zero here (which would give no output). */
17831 for (i
= 0; i
< 32; i
+= 2)
17835 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
17836 output_asm_insn (instr
, operands
);
17846 /* Return the name of a shifter operation. */
17847 static const char *
17848 arm_shift_nmem(enum rtx_code code
)
17853 return ARM_LSL_NAME
;
17869 /* Return the appropriate ARM instruction for the operation code.
17870 The returned result should not be overwritten. OP is the rtx of the
17871 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
17874 arithmetic_instr (rtx op
, int shift_first_arg
)
17876 switch (GET_CODE (op
))
17882 return shift_first_arg
? "rsb" : "sub";
17897 return arm_shift_nmem(GET_CODE(op
));
17900 gcc_unreachable ();
17904 /* Ensure valid constant shifts and return the appropriate shift mnemonic
17905 for the operation code. The returned result should not be overwritten.
17906 OP is the rtx code of the shift.
17907 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
17909 static const char *
17910 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
17913 enum rtx_code code
= GET_CODE (op
);
17918 if (!CONST_INT_P (XEXP (op
, 1)))
17920 output_operand_lossage ("invalid shift operand");
17925 *amountp
= 32 - INTVAL (XEXP (op
, 1));
17933 mnem
= arm_shift_nmem(code
);
17934 if (CONST_INT_P (XEXP (op
, 1)))
17936 *amountp
= INTVAL (XEXP (op
, 1));
17938 else if (REG_P (XEXP (op
, 1)))
17945 output_operand_lossage ("invalid shift operand");
17951 /* We never have to worry about the amount being other than a
17952 power of 2, since this case can never be reloaded from a reg. */
17953 if (!CONST_INT_P (XEXP (op
, 1)))
17955 output_operand_lossage ("invalid shift operand");
17959 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
17961 /* Amount must be a power of two. */
17962 if (*amountp
& (*amountp
- 1))
17964 output_operand_lossage ("invalid shift operand");
17968 *amountp
= exact_log2 (*amountp
);
17969 gcc_assert (IN_RANGE (*amountp
, 0, 31));
17970 return ARM_LSL_NAME
;
17973 output_operand_lossage ("invalid shift operand");
17977 /* This is not 100% correct, but follows from the desire to merge
17978 multiplication by a power of 2 with the recognizer for a
17979 shift. >=32 is not a valid shift for "lsl", so we must try and
17980 output a shift that produces the correct arithmetical result.
17981 Using lsr #32 is identical except for the fact that the carry bit
17982 is not set correctly if we set the flags; but we never use the
17983 carry bit from such an operation, so we can ignore that. */
17984 if (code
== ROTATERT
)
17985 /* Rotate is just modulo 32. */
17987 else if (*amountp
!= (*amountp
& 31))
17989 if (code
== ASHIFT
)
17994 /* Shifts of 0 are no-ops. */
18001 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18002 because /bin/as is horribly restrictive. The judgement about
18003 whether or not each character is 'printable' (and can be output as
18004 is) or not (and must be printed with an octal escape) must be made
18005 with reference to the *host* character set -- the situation is
18006 similar to that discussed in the comments above pp_c_char in
18007 c-pretty-print.c. */
18009 #define MAX_ASCII_LEN 51
18012 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18015 int len_so_far
= 0;
18017 fputs ("\t.ascii\t\"", stream
);
18019 for (i
= 0; i
< len
; i
++)
18023 if (len_so_far
>= MAX_ASCII_LEN
)
18025 fputs ("\"\n\t.ascii\t\"", stream
);
18031 if (c
== '\\' || c
== '\"')
18033 putc ('\\', stream
);
18041 fprintf (stream
, "\\%03o", c
);
18046 fputs ("\"\n", stream
);
18049 /* Whether a register is callee saved or not. This is necessary because high
18050 registers are marked as caller saved when optimizing for size on Thumb-1
18051 targets despite being callee saved in order to avoid using them. */
18052 #define callee_saved_reg_p(reg) \
18053 (!call_used_regs[reg] \
18054 || (TARGET_THUMB1 && optimize_size \
18055 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18057 /* Compute the register save mask for registers 0 through 12
18058 inclusive. This code is used by arm_compute_save_reg_mask. */
18060 static unsigned long
18061 arm_compute_save_reg0_reg12_mask (void)
18063 unsigned long func_type
= arm_current_func_type ();
18064 unsigned long save_reg_mask
= 0;
18067 if (IS_INTERRUPT (func_type
))
18069 unsigned int max_reg
;
18070 /* Interrupt functions must not corrupt any registers,
18071 even call clobbered ones. If this is a leaf function
18072 we can just examine the registers used by the RTL, but
18073 otherwise we have to assume that whatever function is
18074 called might clobber anything, and so we have to save
18075 all the call-clobbered registers as well. */
18076 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18077 /* FIQ handlers have registers r8 - r12 banked, so
18078 we only need to check r0 - r7, Normal ISRs only
18079 bank r14 and r15, so we must check up to r12.
18080 r13 is the stack pointer which is always preserved,
18081 so we do not need to consider it here. */
18086 for (reg
= 0; reg
<= max_reg
; reg
++)
18087 if (df_regs_ever_live_p (reg
)
18088 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18089 save_reg_mask
|= (1 << reg
);
18091 /* Also save the pic base register if necessary. */
18093 && !TARGET_SINGLE_PIC_BASE
18094 && arm_pic_register
!= INVALID_REGNUM
18095 && crtl
->uses_pic_offset_table
)
18096 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18098 else if (IS_VOLATILE(func_type
))
18100 /* For noreturn functions we historically omitted register saves
18101 altogether. However this really messes up debugging. As a
18102 compromise save just the frame pointers. Combined with the link
18103 register saved elsewhere this should be sufficient to get
18105 if (frame_pointer_needed
)
18106 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18107 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18108 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18109 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18110 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18114 /* In the normal case we only need to save those registers
18115 which are call saved and which are used by this function. */
18116 for (reg
= 0; reg
<= 11; reg
++)
18117 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
18118 save_reg_mask
|= (1 << reg
);
18120 /* Handle the frame pointer as a special case. */
18121 if (frame_pointer_needed
)
18122 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18124 /* If we aren't loading the PIC register,
18125 don't stack it even though it may be live. */
18127 && !TARGET_SINGLE_PIC_BASE
18128 && arm_pic_register
!= INVALID_REGNUM
18129 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18130 || crtl
->uses_pic_offset_table
))
18131 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18133 /* The prologue will copy SP into R0, so save it. */
18134 if (IS_STACKALIGN (func_type
))
18135 save_reg_mask
|= 1;
18138 /* Save registers so the exception handler can modify them. */
18139 if (crtl
->calls_eh_return
)
18145 reg
= EH_RETURN_DATA_REGNO (i
);
18146 if (reg
== INVALID_REGNUM
)
18148 save_reg_mask
|= 1 << reg
;
18152 return save_reg_mask
;
18155 /* Return true if r3 is live at the start of the function. */
18158 arm_r3_live_at_start_p (void)
18160 /* Just look at cfg info, which is still close enough to correct at this
18161 point. This gives false positives for broken functions that might use
18162 uninitialized data that happens to be allocated in r3, but who cares? */
18163 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
18166 /* Compute the number of bytes used to store the static chain register on the
18167 stack, above the stack frame. We need to know this accurately to get the
18168 alignment of the rest of the stack frame correct. */
18171 arm_compute_static_chain_stack_bytes (void)
18173 /* See the defining assertion in arm_expand_prologue. */
18174 if (IS_NESTED (arm_current_func_type ())
18175 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18176 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
18177 && !df_regs_ever_live_p (LR_REGNUM
)))
18178 && arm_r3_live_at_start_p ()
18179 && crtl
->args
.pretend_args_size
== 0)
18185 /* Compute a bit mask of which registers need to be
18186 saved on the stack for the current function.
18187 This is used by arm_get_frame_offsets, which may add extra registers. */
18189 static unsigned long
18190 arm_compute_save_reg_mask (void)
18192 unsigned int save_reg_mask
= 0;
18193 unsigned long func_type
= arm_current_func_type ();
18196 if (IS_NAKED (func_type
))
18197 /* This should never really happen. */
18200 /* If we are creating a stack frame, then we must save the frame pointer,
18201 IP (which will hold the old stack pointer), LR and the PC. */
18202 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18204 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
18207 | (1 << PC_REGNUM
);
18209 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
18211 /* Decide if we need to save the link register.
18212 Interrupt routines have their own banked link register,
18213 so they never need to save it.
18214 Otherwise if we do not use the link register we do not need to save
18215 it. If we are pushing other registers onto the stack however, we
18216 can save an instruction in the epilogue by pushing the link register
18217 now and then popping it back into the PC. This incurs extra memory
18218 accesses though, so we only do it when optimizing for size, and only
18219 if we know that we will not need a fancy return sequence. */
18220 if (df_regs_ever_live_p (LR_REGNUM
)
18223 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
18224 && !crtl
->tail_call_emit
18225 && !crtl
->calls_eh_return
))
18226 save_reg_mask
|= 1 << LR_REGNUM
;
18228 if (cfun
->machine
->lr_save_eliminated
)
18229 save_reg_mask
&= ~ (1 << LR_REGNUM
);
18231 if (TARGET_REALLY_IWMMXT
18232 && ((bit_count (save_reg_mask
)
18233 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
18234 arm_compute_static_chain_stack_bytes())
18237 /* The total number of registers that are going to be pushed
18238 onto the stack is odd. We need to ensure that the stack
18239 is 64-bit aligned before we start to save iWMMXt registers,
18240 and also before we start to create locals. (A local variable
18241 might be a double or long long which we will load/store using
18242 an iWMMXt instruction). Therefore we need to push another
18243 ARM register, so that the stack will be 64-bit aligned. We
18244 try to avoid using the arg registers (r0 -r3) as they might be
18245 used to pass values in a tail call. */
18246 for (reg
= 4; reg
<= 12; reg
++)
18247 if ((save_reg_mask
& (1 << reg
)) == 0)
18251 save_reg_mask
|= (1 << reg
);
18254 cfun
->machine
->sibcall_blocked
= 1;
18255 save_reg_mask
|= (1 << 3);
18259 /* We may need to push an additional register for use initializing the
18260 PIC base register. */
18261 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
18262 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
18264 reg
= thumb_find_work_register (1 << 4);
18265 if (!call_used_regs
[reg
])
18266 save_reg_mask
|= (1 << reg
);
18269 return save_reg_mask
;
18272 /* Compute a bit mask of which registers need to be
18273 saved on the stack for the current function. */
18274 static unsigned long
18275 thumb1_compute_save_reg_mask (void)
18277 unsigned long mask
;
18281 for (reg
= 0; reg
< 12; reg
++)
18282 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
18286 && !TARGET_SINGLE_PIC_BASE
18287 && arm_pic_register
!= INVALID_REGNUM
18288 && crtl
->uses_pic_offset_table
)
18289 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18291 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18292 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
18293 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18295 /* LR will also be pushed if any lo regs are pushed. */
18296 if (mask
& 0xff || thumb_force_lr_save ())
18297 mask
|= (1 << LR_REGNUM
);
18299 /* Make sure we have a low work register if we need one.
18300 We will need one if we are going to push a high register,
18301 but we are not currently intending to push a low register. */
18302 if ((mask
& 0xff) == 0
18303 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
18305 /* Use thumb_find_work_register to choose which register
18306 we will use. If the register is live then we will
18307 have to push it. Use LAST_LO_REGNUM as our fallback
18308 choice for the register to select. */
18309 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
18310 /* Make sure the register returned by thumb_find_work_register is
18311 not part of the return value. */
18312 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
18313 reg
= LAST_LO_REGNUM
;
18315 if (callee_saved_reg_p (reg
))
18319 /* The 504 below is 8 bytes less than 512 because there are two possible
18320 alignment words. We can't tell here if they will be present or not so we
18321 have to play it safe and assume that they are. */
18322 if ((CALLER_INTERWORKING_SLOT_SIZE
+
18323 ROUND_UP_WORD (get_frame_size ()) +
18324 crtl
->outgoing_args_size
) >= 504)
18326 /* This is the same as the code in thumb1_expand_prologue() which
18327 determines which register to use for stack decrement. */
18328 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
18329 if (mask
& (1 << reg
))
18332 if (reg
> LAST_LO_REGNUM
)
18334 /* Make sure we have a register available for stack decrement. */
18335 mask
|= 1 << LAST_LO_REGNUM
;
18343 /* Return the number of bytes required to save VFP registers. */
18345 arm_get_vfp_saved_size (void)
18347 unsigned int regno
;
18352 /* Space for saved VFP registers. */
18353 if (TARGET_HARD_FLOAT
)
18356 for (regno
= FIRST_VFP_REGNUM
;
18357 regno
< LAST_VFP_REGNUM
;
18360 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
18361 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
18365 /* Workaround ARM10 VFPr1 bug. */
18366 if (count
== 2 && !arm_arch6
)
18368 saved
+= count
* 8;
18377 if (count
== 2 && !arm_arch6
)
18379 saved
+= count
* 8;
18386 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18387 everything bar the final return instruction. If simple_return is true,
18388 then do not output epilogue, because it has already been emitted in RTL. */
18390 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
18391 bool simple_return
)
18393 char conditional
[10];
18396 unsigned long live_regs_mask
;
18397 unsigned long func_type
;
18398 arm_stack_offsets
*offsets
;
18400 func_type
= arm_current_func_type ();
18402 if (IS_NAKED (func_type
))
18405 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
18407 /* If this function was declared non-returning, and we have
18408 found a tail call, then we have to trust that the called
18409 function won't return. */
18414 /* Otherwise, trap an attempted return by aborting. */
18416 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
18418 assemble_external_libcall (ops
[1]);
18419 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
18425 gcc_assert (!cfun
->calls_alloca
|| really_return
);
18427 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
18429 cfun
->machine
->return_used_this_function
= 1;
18431 offsets
= arm_get_frame_offsets ();
18432 live_regs_mask
= offsets
->saved_regs_mask
;
18434 if (!simple_return
&& live_regs_mask
)
18436 const char * return_reg
;
18438 /* If we do not have any special requirements for function exit
18439 (e.g. interworking) then we can load the return address
18440 directly into the PC. Otherwise we must load it into LR. */
18442 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
18443 return_reg
= reg_names
[PC_REGNUM
];
18445 return_reg
= reg_names
[LR_REGNUM
];
18447 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
18449 /* There are three possible reasons for the IP register
18450 being saved. 1) a stack frame was created, in which case
18451 IP contains the old stack pointer, or 2) an ISR routine
18452 corrupted it, or 3) it was saved to align the stack on
18453 iWMMXt. In case 1, restore IP into SP, otherwise just
18455 if (frame_pointer_needed
)
18457 live_regs_mask
&= ~ (1 << IP_REGNUM
);
18458 live_regs_mask
|= (1 << SP_REGNUM
);
18461 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
18464 /* On some ARM architectures it is faster to use LDR rather than
18465 LDM to load a single register. On other architectures, the
18466 cost is the same. In 26 bit mode, or for exception handlers,
18467 we have to use LDM to load the PC so that the CPSR is also
18469 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
18470 if (live_regs_mask
== (1U << reg
))
18473 if (reg
<= LAST_ARM_REGNUM
18474 && (reg
!= LR_REGNUM
18476 || ! IS_INTERRUPT (func_type
)))
18478 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
18479 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
18486 /* Generate the load multiple instruction to restore the
18487 registers. Note we can get here, even if
18488 frame_pointer_needed is true, but only if sp already
18489 points to the base of the saved core registers. */
18490 if (live_regs_mask
& (1 << SP_REGNUM
))
18492 unsigned HOST_WIDE_INT stack_adjust
;
18494 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
18495 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
18497 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
18498 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
18501 /* If we can't use ldmib (SA110 bug),
18502 then try to pop r3 instead. */
18504 live_regs_mask
|= 1 << 3;
18506 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
18509 /* For interrupt returns we have to use an LDM rather than
18510 a POP so that we can use the exception return variant. */
18511 else if (IS_INTERRUPT (func_type
))
18512 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
18514 sprintf (instr
, "pop%s\t{", conditional
);
18516 p
= instr
+ strlen (instr
);
18518 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
18519 if (live_regs_mask
& (1 << reg
))
18521 int l
= strlen (reg_names
[reg
]);
18527 memcpy (p
, ", ", 2);
18531 memcpy (p
, "%|", 2);
18532 memcpy (p
+ 2, reg_names
[reg
], l
);
18536 if (live_regs_mask
& (1 << LR_REGNUM
))
18538 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
18539 /* If returning from an interrupt, restore the CPSR. */
18540 if (IS_INTERRUPT (func_type
))
18547 output_asm_insn (instr
, & operand
);
18549 /* See if we need to generate an extra instruction to
18550 perform the actual function return. */
18552 && func_type
!= ARM_FT_INTERWORKED
18553 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
18555 /* The return has already been handled
18556 by loading the LR into the PC. */
18563 switch ((int) ARM_FUNC_TYPE (func_type
))
18567 /* ??? This is wrong for unified assembly syntax. */
18568 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
18571 case ARM_FT_INTERWORKED
:
18572 gcc_assert (arm_arch5
|| arm_arch4t
);
18573 sprintf (instr
, "bx%s\t%%|lr", conditional
);
18576 case ARM_FT_EXCEPTION
:
18577 /* ??? This is wrong for unified assembly syntax. */
18578 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
18582 /* Use bx if it's available. */
18583 if (arm_arch5
|| arm_arch4t
)
18584 sprintf (instr
, "bx%s\t%%|lr", conditional
);
18586 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
18590 output_asm_insn (instr
, & operand
);
18596 /* Write the function name into the code section, directly preceding
18597 the function prologue.
18599 Code will be output similar to this:
18601 .ascii "arm_poke_function_name", 0
18604 .word 0xff000000 + (t1 - t0)
18605 arm_poke_function_name
18607 stmfd sp!, {fp, ip, lr, pc}
18610 When performing a stack backtrace, code can inspect the value
18611 of 'pc' stored at 'fp' + 0. If the trace function then looks
18612 at location pc - 12 and the top 8 bits are set, then we know
18613 that there is a function name embedded immediately preceding this
18614 location and has length ((pc[-3]) & 0xff000000).
18616 We assume that pc is declared as a pointer to an unsigned long.
18618 It is of no benefit to output the function name if we are assembling
18619 a leaf function. These function types will not contain a stack
18620 backtrace structure, therefore it is not possible to determine the
18623 arm_poke_function_name (FILE *stream
, const char *name
)
18625 unsigned long alignlength
;
18626 unsigned long length
;
18629 length
= strlen (name
) + 1;
18630 alignlength
= ROUND_UP_WORD (length
);
18632 ASM_OUTPUT_ASCII (stream
, name
, length
);
18633 ASM_OUTPUT_ALIGN (stream
, 2);
18634 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
18635 assemble_aligned_integer (UNITS_PER_WORD
, x
);
18638 /* Place some comments into the assembler stream
18639 describing the current function. */
18641 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
18643 unsigned long func_type
;
18645 /* ??? Do we want to print some of the below anyway? */
18649 /* Sanity check. */
18650 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
18652 func_type
= arm_current_func_type ();
18654 switch ((int) ARM_FUNC_TYPE (func_type
))
18657 case ARM_FT_NORMAL
:
18659 case ARM_FT_INTERWORKED
:
18660 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
18663 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
18666 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
18668 case ARM_FT_EXCEPTION
:
18669 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
18673 if (IS_NAKED (func_type
))
18674 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
18676 if (IS_VOLATILE (func_type
))
18677 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
18679 if (IS_NESTED (func_type
))
18680 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
18681 if (IS_STACKALIGN (func_type
))
18682 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
18684 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
18686 crtl
->args
.pretend_args_size
, frame_size
);
18688 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
18689 frame_pointer_needed
,
18690 cfun
->machine
->uses_anonymous_args
);
18692 if (cfun
->machine
->lr_save_eliminated
)
18693 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
18695 if (crtl
->calls_eh_return
)
18696 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
18701 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
18702 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
18704 arm_stack_offsets
*offsets
;
18710 /* Emit any call-via-reg trampolines that are needed for v4t support
18711 of call_reg and call_value_reg type insns. */
18712 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
18714 rtx label
= cfun
->machine
->call_via
[regno
];
18718 switch_to_section (function_section (current_function_decl
));
18719 targetm
.asm_out
.internal_label (asm_out_file
, "L",
18720 CODE_LABEL_NUMBER (label
));
18721 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
18725 /* ??? Probably not safe to set this here, since it assumes that a
18726 function will be emitted as assembly immediately after we generate
18727 RTL for it. This does not happen for inline functions. */
18728 cfun
->machine
->return_used_this_function
= 0;
18730 else /* TARGET_32BIT */
18732 /* We need to take into account any stack-frame rounding. */
18733 offsets
= arm_get_frame_offsets ();
18735 gcc_assert (!use_return_insn (FALSE
, NULL
)
18736 || (cfun
->machine
->return_used_this_function
!= 0)
18737 || offsets
->saved_regs
== offsets
->outgoing_args
18738 || frame_pointer_needed
);
18742 /* Generate and emit a sequence of insns equivalent to PUSH, but using
18743 STR and STRD. If an even number of registers are being pushed, one
18744 or more STRD patterns are created for each register pair. If an
18745 odd number of registers are pushed, emit an initial STR followed by
18746 as many STRD instructions as are needed. This works best when the
18747 stack is initially 64-bit aligned (the normal case), since it
18748 ensures that each STRD is also 64-bit aligned. */
18750 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
18755 rtx par
= NULL_RTX
;
18756 rtx dwarf
= NULL_RTX
;
18760 num_regs
= bit_count (saved_regs_mask
);
18762 /* Must be at least one register to save, and can't save SP or PC. */
18763 gcc_assert (num_regs
> 0 && num_regs
<= 14);
18764 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
18765 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
18767 /* Create sequence for DWARF info. All the frame-related data for
18768 debugging is held in this wrapper. */
18769 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
18771 /* Describe the stack adjustment. */
18772 tmp
= gen_rtx_SET (stack_pointer_rtx
,
18773 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
18774 RTX_FRAME_RELATED_P (tmp
) = 1;
18775 XVECEXP (dwarf
, 0, 0) = tmp
;
18777 /* Find the first register. */
18778 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
18783 /* If there's an odd number of registers to push. Start off by
18784 pushing a single register. This ensures that subsequent strd
18785 operations are dword aligned (assuming that SP was originally
18786 64-bit aligned). */
18787 if ((num_regs
& 1) != 0)
18789 rtx reg
, mem
, insn
;
18791 reg
= gen_rtx_REG (SImode
, regno
);
18793 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
18794 stack_pointer_rtx
));
18796 mem
= gen_frame_mem (Pmode
,
18798 (Pmode
, stack_pointer_rtx
,
18799 plus_constant (Pmode
, stack_pointer_rtx
,
18802 tmp
= gen_rtx_SET (mem
, reg
);
18803 RTX_FRAME_RELATED_P (tmp
) = 1;
18804 insn
= emit_insn (tmp
);
18805 RTX_FRAME_RELATED_P (insn
) = 1;
18806 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
18807 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
18808 RTX_FRAME_RELATED_P (tmp
) = 1;
18811 XVECEXP (dwarf
, 0, i
) = tmp
;
18815 while (i
< num_regs
)
18816 if (saved_regs_mask
& (1 << regno
))
18818 rtx reg1
, reg2
, mem1
, mem2
;
18819 rtx tmp0
, tmp1
, tmp2
;
18822 /* Find the register to pair with this one. */
18823 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
18827 reg1
= gen_rtx_REG (SImode
, regno
);
18828 reg2
= gen_rtx_REG (SImode
, regno2
);
18835 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18838 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18840 -4 * (num_regs
- 1)));
18841 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
18842 plus_constant (Pmode
, stack_pointer_rtx
,
18844 tmp1
= gen_rtx_SET (mem1
, reg1
);
18845 tmp2
= gen_rtx_SET (mem2
, reg2
);
18846 RTX_FRAME_RELATED_P (tmp0
) = 1;
18847 RTX_FRAME_RELATED_P (tmp1
) = 1;
18848 RTX_FRAME_RELATED_P (tmp2
) = 1;
18849 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
18850 XVECEXP (par
, 0, 0) = tmp0
;
18851 XVECEXP (par
, 0, 1) = tmp1
;
18852 XVECEXP (par
, 0, 2) = tmp2
;
18853 insn
= emit_insn (par
);
18854 RTX_FRAME_RELATED_P (insn
) = 1;
18855 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
18859 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18862 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18865 tmp1
= gen_rtx_SET (mem1
, reg1
);
18866 tmp2
= gen_rtx_SET (mem2
, reg2
);
18867 RTX_FRAME_RELATED_P (tmp1
) = 1;
18868 RTX_FRAME_RELATED_P (tmp2
) = 1;
18869 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
18870 XVECEXP (par
, 0, 0) = tmp1
;
18871 XVECEXP (par
, 0, 1) = tmp2
;
18875 /* Create unwind information. This is an approximation. */
18876 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
18877 plus_constant (Pmode
,
18881 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
18882 plus_constant (Pmode
,
18887 RTX_FRAME_RELATED_P (tmp1
) = 1;
18888 RTX_FRAME_RELATED_P (tmp2
) = 1;
18889 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
18890 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
18892 regno
= regno2
+ 1;
18900 /* STRD in ARM mode requires consecutive registers. This function emits STRD
18901 whenever possible, otherwise it emits single-word stores. The first store
18902 also allocates stack space for all saved registers, using writeback with
18903 post-addressing mode. All other stores use offset addressing. If no STRD
18904 can be emitted, this function emits a sequence of single-word stores,
18905 and not an STM as before, because single-word stores provide more freedom
18906 scheduling and can be turned into an STM by peephole optimizations. */
18908 arm_emit_strd_push (unsigned long saved_regs_mask
)
18911 int i
, j
, dwarf_index
= 0;
18913 rtx dwarf
= NULL_RTX
;
18914 rtx insn
= NULL_RTX
;
18917 /* TODO: A more efficient code can be emitted by changing the
18918 layout, e.g., first push all pairs that can use STRD to keep the
18919 stack aligned, and then push all other registers. */
18920 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
18921 if (saved_regs_mask
& (1 << i
))
18924 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
18925 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
18926 gcc_assert (num_regs
> 0);
18928 /* Create sequence for DWARF info. */
18929 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
18931 /* For dwarf info, we generate explicit stack update. */
18932 tmp
= gen_rtx_SET (stack_pointer_rtx
,
18933 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
18934 RTX_FRAME_RELATED_P (tmp
) = 1;
18935 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
18937 /* Save registers. */
18938 offset
= - 4 * num_regs
;
18940 while (j
<= LAST_ARM_REGNUM
)
18941 if (saved_regs_mask
& (1 << j
))
18944 && (saved_regs_mask
& (1 << (j
+ 1))))
18946 /* Current register and previous register form register pair for
18947 which STRD can be generated. */
18950 /* Allocate stack space for all saved registers. */
18951 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
18952 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
18953 mem
= gen_frame_mem (DImode
, tmp
);
18956 else if (offset
> 0)
18957 mem
= gen_frame_mem (DImode
,
18958 plus_constant (Pmode
,
18962 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
18964 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
18965 RTX_FRAME_RELATED_P (tmp
) = 1;
18966 tmp
= emit_insn (tmp
);
18968 /* Record the first store insn. */
18969 if (dwarf_index
== 1)
18972 /* Generate dwarf info. */
18973 mem
= gen_frame_mem (SImode
,
18974 plus_constant (Pmode
,
18977 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
18978 RTX_FRAME_RELATED_P (tmp
) = 1;
18979 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
18981 mem
= gen_frame_mem (SImode
,
18982 plus_constant (Pmode
,
18985 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
18986 RTX_FRAME_RELATED_P (tmp
) = 1;
18987 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
18994 /* Emit a single word store. */
18997 /* Allocate stack space for all saved registers. */
18998 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
18999 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19000 mem
= gen_frame_mem (SImode
, tmp
);
19003 else if (offset
> 0)
19004 mem
= gen_frame_mem (SImode
,
19005 plus_constant (Pmode
,
19009 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19011 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19012 RTX_FRAME_RELATED_P (tmp
) = 1;
19013 tmp
= emit_insn (tmp
);
19015 /* Record the first store insn. */
19016 if (dwarf_index
== 1)
19019 /* Generate dwarf info. */
19020 mem
= gen_frame_mem (SImode
,
19021 plus_constant(Pmode
,
19024 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19025 RTX_FRAME_RELATED_P (tmp
) = 1;
19026 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19035 /* Attach dwarf info to the first insn we generate. */
19036 gcc_assert (insn
!= NULL_RTX
);
19037 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19038 RTX_FRAME_RELATED_P (insn
) = 1;
19041 /* Generate and emit an insn that we will recognize as a push_multi.
19042 Unfortunately, since this insn does not reflect very well the actual
19043 semantics of the operation, we need to annotate the insn for the benefit
19044 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19045 MASK for registers that should be annotated for DWARF2 frame unwind
19048 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19051 int num_dwarf_regs
= 0;
19055 int dwarf_par_index
;
19058 /* We don't record the PC in the dwarf frame information. */
19059 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
19061 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19063 if (mask
& (1 << i
))
19065 if (dwarf_regs_mask
& (1 << i
))
19069 gcc_assert (num_regs
&& num_regs
<= 16);
19070 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
19072 /* For the body of the insn we are going to generate an UNSPEC in
19073 parallel with several USEs. This allows the insn to be recognized
19074 by the push_multi pattern in the arm.md file.
19076 The body of the insn looks something like this:
19079 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19080 (const_int:SI <num>)))
19081 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19087 For the frame note however, we try to be more explicit and actually
19088 show each register being stored into the stack frame, plus a (single)
19089 decrement of the stack pointer. We do it this way in order to be
19090 friendly to the stack unwinding code, which only wants to see a single
19091 stack decrement per instruction. The RTL we generate for the note looks
19092 something like this:
19095 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19096 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19097 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19098 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19102 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19103 instead we'd have a parallel expression detailing all
19104 the stores to the various memory addresses so that debug
19105 information is more up-to-date. Remember however while writing
19106 this to take care of the constraints with the push instruction.
19108 Note also that this has to be taken care of for the VFP registers.
19110 For more see PR43399. */
19112 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19113 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19114 dwarf_par_index
= 1;
19116 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19118 if (mask
& (1 << i
))
19120 reg
= gen_rtx_REG (SImode
, i
);
19122 XVECEXP (par
, 0, 0)
19123 = gen_rtx_SET (gen_frame_mem
19125 gen_rtx_PRE_MODIFY (Pmode
,
19128 (Pmode
, stack_pointer_rtx
,
19131 gen_rtx_UNSPEC (BLKmode
,
19132 gen_rtvec (1, reg
),
19133 UNSPEC_PUSH_MULT
));
19135 if (dwarf_regs_mask
& (1 << i
))
19137 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
19139 RTX_FRAME_RELATED_P (tmp
) = 1;
19140 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19147 for (j
= 1, i
++; j
< num_regs
; i
++)
19149 if (mask
& (1 << i
))
19151 reg
= gen_rtx_REG (SImode
, i
);
19153 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
19155 if (dwarf_regs_mask
& (1 << i
))
19158 = gen_rtx_SET (gen_frame_mem
19160 plus_constant (Pmode
, stack_pointer_rtx
,
19163 RTX_FRAME_RELATED_P (tmp
) = 1;
19164 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19171 par
= emit_insn (par
);
19173 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19174 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19175 RTX_FRAME_RELATED_P (tmp
) = 1;
19176 XVECEXP (dwarf
, 0, 0) = tmp
;
19178 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19183 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19184 SIZE is the offset to be adjusted.
19185 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19187 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
19191 RTX_FRAME_RELATED_P (insn
) = 1;
19192 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
19193 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
19196 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19197 SAVED_REGS_MASK shows which registers need to be restored.
19199 Unfortunately, since this insn does not reflect very well the actual
19200 semantics of the operation, we need to annotate the insn for the benefit
19201 of DWARF2 frame unwind information. */
19203 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
19208 rtx dwarf
= NULL_RTX
;
19210 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
19214 offset_adj
= return_in_pc
? 1 : 0;
19215 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19216 if (saved_regs_mask
& (1 << i
))
19219 gcc_assert (num_regs
&& num_regs
<= 16);
19221 /* If SP is in reglist, then we don't emit SP update insn. */
19222 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
19224 /* The parallel needs to hold num_regs SETs
19225 and one SET for the stack update. */
19226 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
19229 XVECEXP (par
, 0, 0) = ret_rtx
;
19233 /* Increment the stack pointer, based on there being
19234 num_regs 4-byte registers to restore. */
19235 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19236 plus_constant (Pmode
,
19239 RTX_FRAME_RELATED_P (tmp
) = 1;
19240 XVECEXP (par
, 0, offset_adj
) = tmp
;
19243 /* Now restore every reg, which may include PC. */
19244 for (j
= 0, i
= 0; j
< num_regs
; i
++)
19245 if (saved_regs_mask
& (1 << i
))
19247 reg
= gen_rtx_REG (SImode
, i
);
19248 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
19250 /* Emit single load with writeback. */
19251 tmp
= gen_frame_mem (SImode
,
19252 gen_rtx_POST_INC (Pmode
,
19253 stack_pointer_rtx
));
19254 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
19255 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19259 tmp
= gen_rtx_SET (reg
,
19262 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
19263 RTX_FRAME_RELATED_P (tmp
) = 1;
19264 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
19266 /* We need to maintain a sequence for DWARF info too. As dwarf info
19267 should not have PC, skip PC. */
19268 if (i
!= PC_REGNUM
)
19269 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19275 par
= emit_jump_insn (par
);
19277 par
= emit_insn (par
);
19279 REG_NOTES (par
) = dwarf
;
19281 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
19282 stack_pointer_rtx
, stack_pointer_rtx
);
19285 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19286 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19288 Unfortunately, since this insn does not reflect very well the actual
19289 semantics of the operation, we need to annotate the insn for the benefit
19290 of DWARF2 frame unwind information. */
19292 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
19296 rtx dwarf
= NULL_RTX
;
19299 gcc_assert (num_regs
&& num_regs
<= 32);
19301 /* Workaround ARM10 VFPr1 bug. */
19302 if (num_regs
== 2 && !arm_arch6
)
19304 if (first_reg
== 15)
19310 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19311 there could be up to 32 D-registers to restore.
19312 If there are more than 16 D-registers, make two recursive calls,
19313 each of which emits one pop_multi instruction. */
19316 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
19317 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
19321 /* The parallel needs to hold num_regs SETs
19322 and one SET for the stack update. */
19323 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19325 /* Increment the stack pointer, based on there being
19326 num_regs 8-byte registers to restore. */
19327 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
19328 RTX_FRAME_RELATED_P (tmp
) = 1;
19329 XVECEXP (par
, 0, 0) = tmp
;
19331 /* Now show every reg that will be restored, using a SET for each. */
19332 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
19334 reg
= gen_rtx_REG (DFmode
, i
);
19336 tmp
= gen_rtx_SET (reg
,
19339 plus_constant (Pmode
, base_reg
, 8 * j
)));
19340 RTX_FRAME_RELATED_P (tmp
) = 1;
19341 XVECEXP (par
, 0, j
+ 1) = tmp
;
19343 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19348 par
= emit_insn (par
);
19349 REG_NOTES (par
) = dwarf
;
19351 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
19352 if (REGNO (base_reg
) == IP_REGNUM
)
19354 RTX_FRAME_RELATED_P (par
) = 1;
19355 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
19358 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
19359 base_reg
, base_reg
);
19362 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19363 number of registers are being popped, multiple LDRD patterns are created for
19364 all register pairs. If odd number of registers are popped, last register is
19365 loaded by using LDR pattern. */
19367 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
19371 rtx par
= NULL_RTX
;
19372 rtx dwarf
= NULL_RTX
;
19373 rtx tmp
, reg
, tmp1
;
19374 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
19376 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19377 if (saved_regs_mask
& (1 << i
))
19380 gcc_assert (num_regs
&& num_regs
<= 16);
19382 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19383 to be popped. So, if num_regs is even, now it will become odd,
19384 and we can generate pop with PC. If num_regs is odd, it will be
19385 even now, and ldr with return can be generated for PC. */
19389 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19391 /* Var j iterates over all the registers to gather all the registers in
19392 saved_regs_mask. Var i gives index of saved registers in stack frame.
19393 A PARALLEL RTX of register-pair is created here, so that pattern for
19394 LDRD can be matched. As PC is always last register to be popped, and
19395 we have already decremented num_regs if PC, we don't have to worry
19396 about PC in this loop. */
19397 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
19398 if (saved_regs_mask
& (1 << j
))
19400 /* Create RTX for memory load. */
19401 reg
= gen_rtx_REG (SImode
, j
);
19402 tmp
= gen_rtx_SET (reg
,
19403 gen_frame_mem (SImode
,
19404 plus_constant (Pmode
,
19405 stack_pointer_rtx
, 4 * i
)));
19406 RTX_FRAME_RELATED_P (tmp
) = 1;
19410 /* When saved-register index (i) is even, the RTX to be emitted is
19411 yet to be created. Hence create it first. The LDRD pattern we
19412 are generating is :
19413 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19414 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19415 where target registers need not be consecutive. */
19416 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19420 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19421 added as 0th element and if i is odd, reg_i is added as 1st element
19422 of LDRD pattern shown above. */
19423 XVECEXP (par
, 0, (i
% 2)) = tmp
;
19424 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19428 /* When saved-register index (i) is odd, RTXs for both the registers
19429 to be loaded are generated in above given LDRD pattern, and the
19430 pattern can be emitted now. */
19431 par
= emit_insn (par
);
19432 REG_NOTES (par
) = dwarf
;
19433 RTX_FRAME_RELATED_P (par
) = 1;
19439 /* If the number of registers pushed is odd AND return_in_pc is false OR
19440 number of registers are even AND return_in_pc is true, last register is
19441 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19442 then LDR with post increment. */
19444 /* Increment the stack pointer, based on there being
19445 num_regs 4-byte registers to restore. */
19446 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19447 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
19448 RTX_FRAME_RELATED_P (tmp
) = 1;
19449 tmp
= emit_insn (tmp
);
19452 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
19453 stack_pointer_rtx
, stack_pointer_rtx
);
19458 if (((num_regs
% 2) == 1 && !return_in_pc
)
19459 || ((num_regs
% 2) == 0 && return_in_pc
))
19461 /* Scan for the single register to be popped. Skip until the saved
19462 register is found. */
19463 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
19465 /* Gen LDR with post increment here. */
19466 tmp1
= gen_rtx_MEM (SImode
,
19467 gen_rtx_POST_INC (SImode
,
19468 stack_pointer_rtx
));
19469 set_mem_alias_set (tmp1
, get_frame_alias_set ());
19471 reg
= gen_rtx_REG (SImode
, j
);
19472 tmp
= gen_rtx_SET (reg
, tmp1
);
19473 RTX_FRAME_RELATED_P (tmp
) = 1;
19474 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19478 /* If return_in_pc, j must be PC_REGNUM. */
19479 gcc_assert (j
== PC_REGNUM
);
19480 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19481 XVECEXP (par
, 0, 0) = ret_rtx
;
19482 XVECEXP (par
, 0, 1) = tmp
;
19483 par
= emit_jump_insn (par
);
19487 par
= emit_insn (tmp
);
19488 REG_NOTES (par
) = dwarf
;
19489 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
19490 stack_pointer_rtx
, stack_pointer_rtx
);
19494 else if ((num_regs
% 2) == 1 && return_in_pc
)
19496 /* There are 2 registers to be popped. So, generate the pattern
19497 pop_multiple_with_stack_update_and_return to pop in PC. */
19498 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
19504 /* LDRD in ARM mode needs consecutive registers as operands. This function
19505 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
19506 offset addressing and then generates one separate stack udpate. This provides
19507 more scheduling freedom, compared to writeback on every load. However,
19508 if the function returns using load into PC directly
19509 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
19510 before the last load. TODO: Add a peephole optimization to recognize
19511 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
19512 peephole optimization to merge the load at stack-offset zero
19513 with the stack update instruction using load with writeback
19514 in post-index addressing mode. */
19516 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
19520 rtx par
= NULL_RTX
;
19521 rtx dwarf
= NULL_RTX
;
19524 /* Restore saved registers. */
19525 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
19527 while (j
<= LAST_ARM_REGNUM
)
19528 if (saved_regs_mask
& (1 << j
))
19531 && (saved_regs_mask
& (1 << (j
+ 1)))
19532 && (j
+ 1) != PC_REGNUM
)
19534 /* Current register and next register form register pair for which
19535 LDRD can be generated. PC is always the last register popped, and
19536 we handle it separately. */
19538 mem
= gen_frame_mem (DImode
,
19539 plus_constant (Pmode
,
19543 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19545 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
19546 tmp
= emit_insn (tmp
);
19547 RTX_FRAME_RELATED_P (tmp
) = 1;
19549 /* Generate dwarf info. */
19551 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
19552 gen_rtx_REG (SImode
, j
),
19554 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
19555 gen_rtx_REG (SImode
, j
+ 1),
19558 REG_NOTES (tmp
) = dwarf
;
19563 else if (j
!= PC_REGNUM
)
19565 /* Emit a single word load. */
19567 mem
= gen_frame_mem (SImode
,
19568 plus_constant (Pmode
,
19572 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19574 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
19575 tmp
= emit_insn (tmp
);
19576 RTX_FRAME_RELATED_P (tmp
) = 1;
19578 /* Generate dwarf info. */
19579 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
19580 gen_rtx_REG (SImode
, j
),
19586 else /* j == PC_REGNUM */
19592 /* Update the stack. */
19595 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19596 plus_constant (Pmode
,
19599 tmp
= emit_insn (tmp
);
19600 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
19601 stack_pointer_rtx
, stack_pointer_rtx
);
19605 if (saved_regs_mask
& (1 << PC_REGNUM
))
19607 /* Only PC is to be popped. */
19608 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19609 XVECEXP (par
, 0, 0) = ret_rtx
;
19610 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
19611 gen_frame_mem (SImode
,
19612 gen_rtx_POST_INC (SImode
,
19613 stack_pointer_rtx
)));
19614 RTX_FRAME_RELATED_P (tmp
) = 1;
19615 XVECEXP (par
, 0, 1) = tmp
;
19616 par
= emit_jump_insn (par
);
19618 /* Generate dwarf info. */
19619 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
19620 gen_rtx_REG (SImode
, PC_REGNUM
),
19622 REG_NOTES (par
) = dwarf
;
19623 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
19624 stack_pointer_rtx
, stack_pointer_rtx
);
19628 /* Calculate the size of the return value that is passed in registers. */
19630 arm_size_return_regs (void)
19634 if (crtl
->return_rtx
!= 0)
19635 mode
= GET_MODE (crtl
->return_rtx
);
19637 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
19639 return GET_MODE_SIZE (mode
);
19642 /* Return true if the current function needs to save/restore LR. */
19644 thumb_force_lr_save (void)
19646 return !cfun
->machine
->lr_save_eliminated
19647 && (!leaf_function_p ()
19648 || thumb_far_jump_used_p ()
19649 || df_regs_ever_live_p (LR_REGNUM
));
19652 /* We do not know if r3 will be available because
19653 we do have an indirect tailcall happening in this
19654 particular case. */
19656 is_indirect_tailcall_p (rtx call
)
19658 rtx pat
= PATTERN (call
);
19660 /* Indirect tail call. */
19661 pat
= XVECEXP (pat
, 0, 0);
19662 if (GET_CODE (pat
) == SET
)
19663 pat
= SET_SRC (pat
);
19665 pat
= XEXP (XEXP (pat
, 0), 0);
19666 return REG_P (pat
);
19669 /* Return true if r3 is used by any of the tail call insns in the
19670 current function. */
19672 any_sibcall_could_use_r3 (void)
19677 if (!crtl
->tail_call_emit
)
19679 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
19680 if (e
->flags
& EDGE_SIBCALL
)
19682 rtx_insn
*call
= BB_END (e
->src
);
19683 if (!CALL_P (call
))
19684 call
= prev_nonnote_nondebug_insn (call
);
19685 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
19686 if (find_regno_fusage (call
, USE
, 3)
19687 || is_indirect_tailcall_p (call
))
19694 /* Compute the distance from register FROM to register TO.
19695 These can be the arg pointer (26), the soft frame pointer (25),
19696 the stack pointer (13) or the hard frame pointer (11).
19697 In thumb mode r7 is used as the soft frame pointer, if needed.
19698 Typical stack layout looks like this:
19700 old stack pointer -> | |
19703 | | saved arguments for
19704 | | vararg functions
19707 hard FP & arg pointer -> | | \
19715 soft frame pointer -> | | /
19720 locals base pointer -> | | /
19725 current stack pointer -> | | /
19728 For a given function some or all of these stack components
19729 may not be needed, giving rise to the possibility of
19730 eliminating some of the registers.
19732 The values returned by this function must reflect the behavior
19733 of arm_expand_prologue() and arm_compute_save_reg_mask().
19735 The sign of the number returned reflects the direction of stack
19736 growth, so the values are positive for all eliminations except
19737 from the soft frame pointer to the hard frame pointer.
19739 SFP may point just inside the local variables block to ensure correct
19743 /* Calculate stack offsets. These are used to calculate register elimination
19744 offsets and in prologue/epilogue code. Also calculates which registers
19745 should be saved. */
19747 static arm_stack_offsets
*
19748 arm_get_frame_offsets (void)
19750 struct arm_stack_offsets
*offsets
;
19751 unsigned long func_type
;
19755 HOST_WIDE_INT frame_size
;
19758 offsets
= &cfun
->machine
->stack_offsets
;
19760 /* We need to know if we are a leaf function. Unfortunately, it
19761 is possible to be called after start_sequence has been called,
19762 which causes get_insns to return the insns for the sequence,
19763 not the function, which will cause leaf_function_p to return
19764 the incorrect result.
19766 to know about leaf functions once reload has completed, and the
19767 frame size cannot be changed after that time, so we can safely
19768 use the cached value. */
19770 if (reload_completed
)
19773 /* Initially this is the size of the local variables. It will translated
19774 into an offset once we have determined the size of preceding data. */
19775 frame_size
= ROUND_UP_WORD (get_frame_size ());
19777 leaf
= leaf_function_p ();
19779 /* Space for variadic functions. */
19780 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
19782 /* In Thumb mode this is incorrect, but never used. */
19784 = (offsets
->saved_args
19785 + arm_compute_static_chain_stack_bytes ()
19786 + (frame_pointer_needed
? 4 : 0));
19790 unsigned int regno
;
19792 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
19793 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
19794 saved
= core_saved
;
19796 /* We know that SP will be doubleword aligned on entry, and we must
19797 preserve that condition at any subroutine call. We also require the
19798 soft frame pointer to be doubleword aligned. */
19800 if (TARGET_REALLY_IWMMXT
)
19802 /* Check for the call-saved iWMMXt registers. */
19803 for (regno
= FIRST_IWMMXT_REGNUM
;
19804 regno
<= LAST_IWMMXT_REGNUM
;
19806 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
19810 func_type
= arm_current_func_type ();
19811 /* Space for saved VFP registers. */
19812 if (! IS_VOLATILE (func_type
)
19813 && TARGET_HARD_FLOAT
)
19814 saved
+= arm_get_vfp_saved_size ();
19816 else /* TARGET_THUMB1 */
19818 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
19819 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
19820 saved
= core_saved
;
19821 if (TARGET_BACKTRACE
)
19825 /* Saved registers include the stack frame. */
19826 offsets
->saved_regs
19827 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
19828 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
19830 /* A leaf function does not need any stack alignment if it has nothing
19832 if (leaf
&& frame_size
== 0
19833 /* However if it calls alloca(), we have a dynamically allocated
19834 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
19835 && ! cfun
->calls_alloca
)
19837 offsets
->outgoing_args
= offsets
->soft_frame
;
19838 offsets
->locals_base
= offsets
->soft_frame
;
19842 /* Ensure SFP has the correct alignment. */
19843 if (ARM_DOUBLEWORD_ALIGN
19844 && (offsets
->soft_frame
& 7))
19846 offsets
->soft_frame
+= 4;
19847 /* Try to align stack by pushing an extra reg. Don't bother doing this
19848 when there is a stack frame as the alignment will be rolled into
19849 the normal stack adjustment. */
19850 if (frame_size
+ crtl
->outgoing_args_size
== 0)
19854 /* Register r3 is caller-saved. Normally it does not need to be
19855 saved on entry by the prologue. However if we choose to save
19856 it for padding then we may confuse the compiler into thinking
19857 a prologue sequence is required when in fact it is not. This
19858 will occur when shrink-wrapping if r3 is used as a scratch
19859 register and there are no other callee-saved writes.
19861 This situation can be avoided when other callee-saved registers
19862 are available and r3 is not mandatory if we choose a callee-saved
19863 register for padding. */
19864 bool prefer_callee_reg_p
= false;
19866 /* If it is safe to use r3, then do so. This sometimes
19867 generates better code on Thumb-2 by avoiding the need to
19868 use 32-bit push/pop instructions. */
19869 if (! any_sibcall_could_use_r3 ()
19870 && arm_size_return_regs () <= 12
19871 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
19873 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
19876 if (!TARGET_THUMB2
)
19877 prefer_callee_reg_p
= true;
19880 || prefer_callee_reg_p
)
19882 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
19884 /* Avoid fixed registers; they may be changed at
19885 arbitrary times so it's unsafe to restore them
19886 during the epilogue. */
19888 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
19898 offsets
->saved_regs
+= 4;
19899 offsets
->saved_regs_mask
|= (1 << reg
);
19904 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
19905 offsets
->outgoing_args
= (offsets
->locals_base
19906 + crtl
->outgoing_args_size
);
19908 if (ARM_DOUBLEWORD_ALIGN
)
19910 /* Ensure SP remains doubleword aligned. */
19911 if (offsets
->outgoing_args
& 7)
19912 offsets
->outgoing_args
+= 4;
19913 gcc_assert (!(offsets
->outgoing_args
& 7));
19920 /* Calculate the relative offsets for the different stack pointers. Positive
19921 offsets are in the direction of stack growth. */
19924 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
19926 arm_stack_offsets
*offsets
;
19928 offsets
= arm_get_frame_offsets ();
19930 /* OK, now we have enough information to compute the distances.
19931 There must be an entry in these switch tables for each pair
19932 of registers in ELIMINABLE_REGS, even if some of the entries
19933 seem to be redundant or useless. */
19936 case ARG_POINTER_REGNUM
:
19939 case THUMB_HARD_FRAME_POINTER_REGNUM
:
19942 case FRAME_POINTER_REGNUM
:
19943 /* This is the reverse of the soft frame pointer
19944 to hard frame pointer elimination below. */
19945 return offsets
->soft_frame
- offsets
->saved_args
;
19947 case ARM_HARD_FRAME_POINTER_REGNUM
:
19948 /* This is only non-zero in the case where the static chain register
19949 is stored above the frame. */
19950 return offsets
->frame
- offsets
->saved_args
- 4;
19952 case STACK_POINTER_REGNUM
:
19953 /* If nothing has been pushed on the stack at all
19954 then this will return -4. This *is* correct! */
19955 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
19958 gcc_unreachable ();
19960 gcc_unreachable ();
19962 case FRAME_POINTER_REGNUM
:
19965 case THUMB_HARD_FRAME_POINTER_REGNUM
:
19968 case ARM_HARD_FRAME_POINTER_REGNUM
:
19969 /* The hard frame pointer points to the top entry in the
19970 stack frame. The soft frame pointer to the bottom entry
19971 in the stack frame. If there is no stack frame at all,
19972 then they are identical. */
19974 return offsets
->frame
- offsets
->soft_frame
;
19976 case STACK_POINTER_REGNUM
:
19977 return offsets
->outgoing_args
- offsets
->soft_frame
;
19980 gcc_unreachable ();
19982 gcc_unreachable ();
19985 /* You cannot eliminate from the stack pointer.
19986 In theory you could eliminate from the hard frame
19987 pointer to the stack pointer, but this will never
19988 happen, since if a stack frame is not needed the
19989 hard frame pointer will never be used. */
19990 gcc_unreachable ();
19994 /* Given FROM and TO register numbers, say whether this elimination is
19995 allowed. Frame pointer elimination is automatically handled.
19997 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
19998 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
19999 pointer, we must eliminate FRAME_POINTER_REGNUM into
20000 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20001 ARG_POINTER_REGNUM. */
20004 arm_can_eliminate (const int from
, const int to
)
20006 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20007 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20008 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20009 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20013 /* Emit RTL to save coprocessor registers on function entry. Returns the
20014 number of bytes pushed. */
20017 arm_save_coproc_regs(void)
20019 int saved_size
= 0;
20021 unsigned start_reg
;
20024 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20025 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20027 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20028 insn
= gen_rtx_MEM (V2SImode
, insn
);
20029 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20030 RTX_FRAME_RELATED_P (insn
) = 1;
20034 if (TARGET_HARD_FLOAT
)
20036 start_reg
= FIRST_VFP_REGNUM
;
20038 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20040 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20041 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20043 if (start_reg
!= reg
)
20044 saved_size
+= vfp_emit_fstmd (start_reg
,
20045 (reg
- start_reg
) / 2);
20046 start_reg
= reg
+ 2;
20049 if (start_reg
!= reg
)
20050 saved_size
+= vfp_emit_fstmd (start_reg
,
20051 (reg
- start_reg
) / 2);
20057 /* Set the Thumb frame pointer from the stack pointer. */
20060 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20062 HOST_WIDE_INT amount
;
20065 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20067 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20068 stack_pointer_rtx
, GEN_INT (amount
)));
20071 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20072 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20073 expects the first two operands to be the same. */
20076 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20078 hard_frame_pointer_rtx
));
20082 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20083 hard_frame_pointer_rtx
,
20084 stack_pointer_rtx
));
20086 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
20087 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20088 RTX_FRAME_RELATED_P (dwarf
) = 1;
20089 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20092 RTX_FRAME_RELATED_P (insn
) = 1;
20095 struct scratch_reg
{
20100 /* Return a short-lived scratch register for use as a 2nd scratch register on
20101 function entry after the registers are saved in the prologue. This register
20102 must be released by means of release_scratch_register_on_entry. IP is not
20103 considered since it is always used as the 1st scratch register if available.
20105 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
20106 mask of live registers. */
20109 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
20110 unsigned long live_regs
)
20116 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
20122 for (i
= 4; i
< 11; i
++)
20123 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
20131 /* If IP is used as the 1st scratch register for a nested function,
20132 then either r3 wasn't available or is used to preserve IP. */
20133 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
20135 regno
= (regno1
== 3 ? 2 : 3);
20137 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
20142 sr
->reg
= gen_rtx_REG (SImode
, regno
);
20145 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20146 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
20147 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
20148 plus_constant (Pmode
, stack_pointer_rtx
, -4));
20149 RTX_FRAME_RELATED_P (insn
) = 1;
20150 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
20154 /* Release a scratch register obtained from the preceding function. */
20157 release_scratch_register_on_entry (struct scratch_reg
*sr
)
20161 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
20162 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
20163 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
20164 plus_constant (Pmode
, stack_pointer_rtx
, 4));
20165 RTX_FRAME_RELATED_P (insn
) = 1;
20166 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
20170 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
20172 #if PROBE_INTERVAL > 4096
20173 #error Cannot use indexed addressing mode for stack probing
20176 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
20177 inclusive. These are offsets from the current stack pointer. REGNO1
20178 is the index number of the 1st scratch register and LIVE_REGS is the
20179 mask of live registers. */
20182 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
20183 unsigned int regno1
, unsigned long live_regs
)
20185 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
20187 /* See if we have a constant small number of probes to generate. If so,
20188 that's the easy case. */
20189 if (size
<= PROBE_INTERVAL
)
20191 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
20192 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
20193 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
20196 /* The run-time loop is made up of 10 insns in the generic case while the
20197 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
20198 else if (size
<= 5 * PROBE_INTERVAL
)
20200 HOST_WIDE_INT i
, rem
;
20202 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
20203 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
20204 emit_stack_probe (reg1
);
20206 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
20207 it exceeds SIZE. If only two probes are needed, this will not
20208 generate any code. Then probe at FIRST + SIZE. */
20209 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
20211 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
20212 emit_stack_probe (reg1
);
20215 rem
= size
- (i
- PROBE_INTERVAL
);
20216 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
20218 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
20219 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
20222 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
20225 /* Otherwise, do the same as above, but in a loop. Note that we must be
20226 extra careful with variables wrapping around because we might be at
20227 the very top (or the very bottom) of the address space and we have
20228 to be able to handle this case properly; in particular, we use an
20229 equality test for the loop condition. */
20232 HOST_WIDE_INT rounded_size
;
20233 struct scratch_reg sr
;
20235 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
20237 emit_move_insn (reg1
, GEN_INT (first
));
20240 /* Step 1: round SIZE to the previous multiple of the interval. */
20242 rounded_size
= size
& -PROBE_INTERVAL
;
20243 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
20246 /* Step 2: compute initial and final value of the loop counter. */
20248 /* TEST_ADDR = SP + FIRST. */
20249 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
20251 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
20252 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
20255 /* Step 3: the loop
20259 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
20262 while (TEST_ADDR != LAST_ADDR)
20264 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
20265 until it is equal to ROUNDED_SIZE. */
20267 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
20270 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
20271 that SIZE is equal to ROUNDED_SIZE. */
20273 if (size
!= rounded_size
)
20275 HOST_WIDE_INT rem
= size
- rounded_size
;
20277 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
20279 emit_set_insn (sr
.reg
,
20280 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
20281 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
20282 PROBE_INTERVAL
- rem
));
20285 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
20288 release_scratch_register_on_entry (&sr
);
20291 /* Make sure nothing is scheduled before we are done. */
20292 emit_insn (gen_blockage ());
20295 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
20296 absolute addresses. */
20299 output_probe_stack_range (rtx reg1
, rtx reg2
)
20301 static int labelno
= 0;
20305 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
20308 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
20310 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
20312 xops
[1] = GEN_INT (PROBE_INTERVAL
);
20313 output_asm_insn ("sub\t%0, %0, %1", xops
);
20315 /* Probe at TEST_ADDR. */
20316 output_asm_insn ("str\tr0, [%0, #0]", xops
);
20318 /* Test if TEST_ADDR == LAST_ADDR. */
20320 output_asm_insn ("cmp\t%0, %1", xops
);
20323 fputs ("\tbne\t", asm_out_file
);
20324 assemble_name_raw (asm_out_file
, loop_lab
);
20325 fputc ('\n', asm_out_file
);
20330 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20333 arm_expand_prologue (void)
20338 unsigned long live_regs_mask
;
20339 unsigned long func_type
;
20341 int saved_pretend_args
= 0;
20342 int saved_regs
= 0;
20343 unsigned HOST_WIDE_INT args_to_push
;
20344 HOST_WIDE_INT size
;
20345 arm_stack_offsets
*offsets
;
20348 func_type
= arm_current_func_type ();
20350 /* Naked functions don't have prologues. */
20351 if (IS_NAKED (func_type
))
20353 if (flag_stack_usage_info
)
20354 current_function_static_stack_size
= 0;
20358 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20359 args_to_push
= crtl
->args
.pretend_args_size
;
20361 /* Compute which register we will have to save onto the stack. */
20362 offsets
= arm_get_frame_offsets ();
20363 live_regs_mask
= offsets
->saved_regs_mask
;
20365 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20367 if (IS_STACKALIGN (func_type
))
20371 /* Handle a word-aligned stack pointer. We generate the following:
20376 <save and restore r0 in normal prologue/epilogue>
20380 The unwinder doesn't need to know about the stack realignment.
20381 Just tell it we saved SP in r0. */
20382 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20384 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
20385 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
20387 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20388 RTX_FRAME_RELATED_P (insn
) = 1;
20389 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
20391 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
20393 /* ??? The CFA changes here, which may cause GDB to conclude that it
20394 has entered a different function. That said, the unwind info is
20395 correct, individually, before and after this instruction because
20396 we've described the save of SP, which will override the default
20397 handling of SP as restoring from the CFA. */
20398 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
20401 /* The static chain register is the same as the IP register. If it is
20402 clobbered when creating the frame, we need to save and restore it. */
20403 clobber_ip
= IS_NESTED (func_type
)
20404 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20405 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
20406 && !df_regs_ever_live_p (LR_REGNUM
)
20407 && arm_r3_live_at_start_p ()));
20409 /* Find somewhere to store IP whilst the frame is being created.
20410 We try the following places in order:
20412 1. The last argument register r3 if it is available.
20413 2. A slot on the stack above the frame if there are no
20414 arguments to push onto the stack.
20415 3. Register r3 again, after pushing the argument registers
20416 onto the stack, if this is a varargs function.
20417 4. The last slot on the stack created for the arguments to
20418 push, if this isn't a varargs function.
20420 Note - we only need to tell the dwarf2 backend about the SP
20421 adjustment in the second variant; the static chain register
20422 doesn't need to be unwound, as it doesn't contain a value
20423 inherited from the caller. */
20426 if (!arm_r3_live_at_start_p ())
20427 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20428 else if (args_to_push
== 0)
20432 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20435 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20436 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20439 /* Just tell the dwarf backend that we adjusted SP. */
20440 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
20441 plus_constant (Pmode
, stack_pointer_rtx
,
20443 RTX_FRAME_RELATED_P (insn
) = 1;
20444 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20448 /* Store the args on the stack. */
20449 if (cfun
->machine
->uses_anonymous_args
)
20451 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
20452 (0xf0 >> (args_to_push
/ 4)) & 0xf);
20453 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20454 saved_pretend_args
= 1;
20460 if (args_to_push
== 4)
20461 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20463 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
20464 plus_constant (Pmode
,
20468 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20470 /* Just tell the dwarf backend that we adjusted SP. */
20471 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
20472 plus_constant (Pmode
, stack_pointer_rtx
,
20474 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20477 RTX_FRAME_RELATED_P (insn
) = 1;
20478 fp_offset
= args_to_push
;
20483 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20485 if (IS_INTERRUPT (func_type
))
20487 /* Interrupt functions must not corrupt any registers.
20488 Creating a frame pointer however, corrupts the IP
20489 register, so we must push it first. */
20490 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
20492 /* Do not set RTX_FRAME_RELATED_P on this insn.
20493 The dwarf stack unwinding code only wants to see one
20494 stack decrement per function, and this is not it. If
20495 this instruction is labeled as being part of the frame
20496 creation sequence then dwarf2out_frame_debug_expr will
20497 die when it encounters the assignment of IP to FP
20498 later on, since the use of SP here establishes SP as
20499 the CFA register and not IP.
20501 Anyway this instruction is not really part of the stack
20502 frame creation although it is part of the prologue. */
20505 insn
= emit_set_insn (ip_rtx
,
20506 plus_constant (Pmode
, stack_pointer_rtx
,
20508 RTX_FRAME_RELATED_P (insn
) = 1;
20513 /* Push the argument registers, or reserve space for them. */
20514 if (cfun
->machine
->uses_anonymous_args
)
20515 insn
= emit_multi_reg_push
20516 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
20517 (0xf0 >> (args_to_push
/ 4)) & 0xf);
20520 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20521 GEN_INT (- args_to_push
)));
20522 RTX_FRAME_RELATED_P (insn
) = 1;
20525 /* If this is an interrupt service routine, and the link register
20526 is going to be pushed, and we're not generating extra
20527 push of IP (needed when frame is needed and frame layout if apcs),
20528 subtracting four from LR now will mean that the function return
20529 can be done with a single instruction. */
20530 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
20531 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
20532 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
20535 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
20537 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
20540 if (live_regs_mask
)
20542 unsigned long dwarf_regs_mask
= live_regs_mask
;
20544 saved_regs
+= bit_count (live_regs_mask
) * 4;
20545 if (optimize_size
&& !frame_pointer_needed
20546 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
20548 /* If no coprocessor registers are being pushed and we don't have
20549 to worry about a frame pointer then push extra registers to
20550 create the stack frame. This is done is a way that does not
20551 alter the frame layout, so is independent of the epilogue. */
20555 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
20557 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
20558 if (frame
&& n
* 4 >= frame
)
20561 live_regs_mask
|= (1 << n
) - 1;
20562 saved_regs
+= frame
;
20567 && current_tune
->prefer_ldrd_strd
20568 && !optimize_function_for_size_p (cfun
))
20570 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
20572 thumb2_emit_strd_push (live_regs_mask
);
20573 else if (TARGET_ARM
20574 && !TARGET_APCS_FRAME
20575 && !IS_INTERRUPT (func_type
))
20576 arm_emit_strd_push (live_regs_mask
);
20579 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
20580 RTX_FRAME_RELATED_P (insn
) = 1;
20585 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
20586 RTX_FRAME_RELATED_P (insn
) = 1;
20590 if (! IS_VOLATILE (func_type
))
20591 saved_regs
+= arm_save_coproc_regs ();
20593 if (frame_pointer_needed
&& TARGET_ARM
)
20595 /* Create the new frame pointer. */
20596 if (TARGET_APCS_FRAME
)
20598 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
20599 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
20600 RTX_FRAME_RELATED_P (insn
) = 1;
20604 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
20605 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20606 stack_pointer_rtx
, insn
));
20607 RTX_FRAME_RELATED_P (insn
) = 1;
20611 size
= offsets
->outgoing_args
- offsets
->saved_args
;
20612 if (flag_stack_usage_info
)
20613 current_function_static_stack_size
= size
;
20615 /* If this isn't an interrupt service routine and we have a frame, then do
20616 stack checking. We use IP as the first scratch register, except for the
20617 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
20618 if (!IS_INTERRUPT (func_type
)
20619 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
20621 unsigned int regno
;
20623 if (!IS_NESTED (func_type
) || clobber_ip
)
20625 else if (df_regs_ever_live_p (LR_REGNUM
))
20630 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
20632 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
20633 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
20634 size
- STACK_CHECK_PROTECT
,
20635 regno
, live_regs_mask
);
20638 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
20639 regno
, live_regs_mask
);
20642 /* Recover the static chain register. */
20645 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
20646 insn
= gen_rtx_REG (SImode
, 3);
20649 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
20650 insn
= gen_frame_mem (SImode
, insn
);
20652 emit_set_insn (ip_rtx
, insn
);
20653 emit_insn (gen_force_register_use (ip_rtx
));
20656 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
20658 /* This add can produce multiple insns for a large constant, so we
20659 need to get tricky. */
20660 rtx_insn
*last
= get_last_insn ();
20662 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
20663 - offsets
->outgoing_args
);
20665 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20669 last
= last
? NEXT_INSN (last
) : get_insns ();
20670 RTX_FRAME_RELATED_P (last
) = 1;
20672 while (last
!= insn
);
20674 /* If the frame pointer is needed, emit a special barrier that
20675 will prevent the scheduler from moving stores to the frame
20676 before the stack adjustment. */
20677 if (frame_pointer_needed
)
20678 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
20679 hard_frame_pointer_rtx
));
20683 if (frame_pointer_needed
&& TARGET_THUMB2
)
20684 thumb_set_frame_pointer (offsets
);
20686 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20688 unsigned long mask
;
20690 mask
= live_regs_mask
;
20691 mask
&= THUMB2_WORK_REGS
;
20692 if (!IS_NESTED (func_type
))
20693 mask
|= (1 << IP_REGNUM
);
20694 arm_load_pic_register (mask
);
20697 /* If we are profiling, make sure no instructions are scheduled before
20698 the call to mcount. Similarly if the user has requested no
20699 scheduling in the prolog. Similarly if we want non-call exceptions
20700 using the EABI unwinder, to prevent faulting instructions from being
20701 swapped with a stack adjustment. */
20702 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20703 || (arm_except_unwind_info (&global_options
) == UI_TARGET
20704 && cfun
->can_throw_non_call_exceptions
))
20705 emit_insn (gen_blockage ());
20707 /* If the link register is being kept alive, with the return address in it,
20708 then make sure that it does not get reused by the ce2 pass. */
20709 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
20710 cfun
->machine
->lr_save_eliminated
= 1;
20713 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20715 arm_print_condition (FILE *stream
)
20717 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
20719 /* Branch conversion is not implemented for Thumb-2. */
20722 output_operand_lossage ("predicated Thumb instruction");
20725 if (current_insn_predicate
!= NULL
)
20727 output_operand_lossage
20728 ("predicated instruction in conditional sequence");
20732 fputs (arm_condition_codes
[arm_current_cc
], stream
);
20734 else if (current_insn_predicate
)
20736 enum arm_cond_code code
;
20740 output_operand_lossage ("predicated Thumb instruction");
20744 code
= get_arm_condition_code (current_insn_predicate
);
20745 fputs (arm_condition_codes
[code
], stream
);
20750 /* Globally reserved letters: acln
20751 Puncutation letters currently used: @_|?().!#
20752 Lower case letters currently used: bcdefhimpqtvwxyz
20753 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
20754 Letters previously used, but now deprecated/obsolete: sVWXYZ.
20756 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
20758 If CODE is 'd', then the X is a condition operand and the instruction
20759 should only be executed if the condition is true.
20760 if CODE is 'D', then the X is a condition operand and the instruction
20761 should only be executed if the condition is false: however, if the mode
20762 of the comparison is CCFPEmode, then always execute the instruction -- we
20763 do this because in these circumstances !GE does not necessarily imply LT;
20764 in these cases the instruction pattern will take care to make sure that
20765 an instruction containing %d will follow, thereby undoing the effects of
20766 doing this instruction unconditionally.
20767 If CODE is 'N' then X is a floating point operand that must be negated
20769 If CODE is 'B' then output a bitwise inverted value of X (a const int).
20770 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
20772 arm_print_operand (FILE *stream
, rtx x
, int code
)
20777 fputs (ASM_COMMENT_START
, stream
);
20781 fputs (user_label_prefix
, stream
);
20785 fputs (REGISTER_PREFIX
, stream
);
20789 arm_print_condition (stream
);
20793 /* The current condition code for a condition code setting instruction.
20794 Preceded by 's' in unified syntax, otherwise followed by 's'. */
20795 fputc('s', stream
);
20796 arm_print_condition (stream
);
20800 /* If the instruction is conditionally executed then print
20801 the current condition code, otherwise print 's'. */
20802 gcc_assert (TARGET_THUMB2
);
20803 if (current_insn_predicate
)
20804 arm_print_condition (stream
);
20806 fputc('s', stream
);
20809 /* %# is a "break" sequence. It doesn't output anything, but is used to
20810 separate e.g. operand numbers from following text, if that text consists
20811 of further digits which we don't want to be part of the operand
20819 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
20820 fprintf (stream
, "%s", fp_const_from_val (&r
));
20824 /* An integer or symbol address without a preceding # sign. */
20826 switch (GET_CODE (x
))
20829 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
20833 output_addr_const (stream
, x
);
20837 if (GET_CODE (XEXP (x
, 0)) == PLUS
20838 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
20840 output_addr_const (stream
, x
);
20843 /* Fall through. */
20846 output_operand_lossage ("Unsupported operand for code '%c'", code
);
20850 /* An integer that we want to print in HEX. */
20852 switch (GET_CODE (x
))
20855 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
20859 output_operand_lossage ("Unsupported operand for code '%c'", code
);
20864 if (CONST_INT_P (x
))
20867 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
20868 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
20872 putc ('~', stream
);
20873 output_addr_const (stream
, x
);
20878 /* Print the log2 of a CONST_INT. */
20882 if (!CONST_INT_P (x
)
20883 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
20884 output_operand_lossage ("Unsupported operand for code '%c'", code
);
20886 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
20891 /* The low 16 bits of an immediate constant. */
20892 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
20896 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
20900 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
20908 shift
= shift_op (x
, &val
);
20912 fprintf (stream
, ", %s ", shift
);
20914 arm_print_operand (stream
, XEXP (x
, 1), 0);
20916 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
20921 /* An explanation of the 'Q', 'R' and 'H' register operands:
20923 In a pair of registers containing a DI or DF value the 'Q'
20924 operand returns the register number of the register containing
20925 the least significant part of the value. The 'R' operand returns
20926 the register number of the register containing the most
20927 significant part of the value.
20929 The 'H' operand returns the higher of the two register numbers.
20930 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
20931 same as the 'Q' operand, since the most significant part of the
20932 value is held in the lower number register. The reverse is true
20933 on systems where WORDS_BIG_ENDIAN is false.
20935 The purpose of these operands is to distinguish between cases
20936 where the endian-ness of the values is important (for example
20937 when they are added together), and cases where the endian-ness
20938 is irrelevant, but the order of register operations is important.
20939 For example when loading a value from memory into a register
20940 pair, the endian-ness does not matter. Provided that the value
20941 from the lower memory address is put into the lower numbered
20942 register, and the value from the higher address is put into the
20943 higher numbered register, the load will work regardless of whether
20944 the value being loaded is big-wordian or little-wordian. The
20945 order of the two register loads can matter however, if the address
20946 of the memory location is actually held in one of the registers
20947 being overwritten by the load.
20949 The 'Q' and 'R' constraints are also available for 64-bit
20952 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
20954 rtx part
= gen_lowpart (SImode
, x
);
20955 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
20959 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20961 output_operand_lossage ("invalid operand for code '%c'", code
);
20965 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
20969 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
20971 machine_mode mode
= GET_MODE (x
);
20974 if (mode
== VOIDmode
)
20976 part
= gen_highpart_mode (SImode
, mode
, x
);
20977 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
20981 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20983 output_operand_lossage ("invalid operand for code '%c'", code
);
20987 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
20991 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20993 output_operand_lossage ("invalid operand for code '%c'", code
);
20997 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21001 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21003 output_operand_lossage ("invalid operand for code '%c'", code
);
21007 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21011 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21013 output_operand_lossage ("invalid operand for code '%c'", code
);
21017 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21021 asm_fprintf (stream
, "%r",
21022 REG_P (XEXP (x
, 0))
21023 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21027 asm_fprintf (stream
, "{%r-%r}",
21029 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21032 /* Like 'M', but writing doubleword vector registers, for use by Neon
21036 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21037 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21039 asm_fprintf (stream
, "{d%d}", regno
);
21041 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21046 /* CONST_TRUE_RTX means always -- that's the default. */
21047 if (x
== const_true_rtx
)
21050 if (!COMPARISON_P (x
))
21052 output_operand_lossage ("invalid operand for code '%c'", code
);
21056 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21061 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21062 want to do that. */
21063 if (x
== const_true_rtx
)
21065 output_operand_lossage ("instruction never executed");
21068 if (!COMPARISON_P (x
))
21070 output_operand_lossage ("invalid operand for code '%c'", code
);
21074 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21075 (get_arm_condition_code (x
))],
21085 /* Former Maverick support, removed after GCC-4.7. */
21086 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21091 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21092 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21093 /* Bad value for wCG register number. */
21095 output_operand_lossage ("invalid operand for code '%c'", code
);
21100 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21103 /* Print an iWMMXt control register name. */
21105 if (!CONST_INT_P (x
)
21107 || INTVAL (x
) >= 16)
21108 /* Bad value for wC register number. */
21110 output_operand_lossage ("invalid operand for code '%c'", code
);
21116 static const char * wc_reg_names
[16] =
21118 "wCID", "wCon", "wCSSF", "wCASF",
21119 "wC4", "wC5", "wC6", "wC7",
21120 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21121 "wC12", "wC13", "wC14", "wC15"
21124 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21128 /* Print the high single-precision register of a VFP double-precision
21132 machine_mode mode
= GET_MODE (x
);
21135 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21137 output_operand_lossage ("invalid operand for code '%c'", code
);
21142 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21144 output_operand_lossage ("invalid operand for code '%c'", code
);
21148 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21152 /* Print a VFP/Neon double precision or quad precision register name. */
21156 machine_mode mode
= GET_MODE (x
);
21157 int is_quad
= (code
== 'q');
21160 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21162 output_operand_lossage ("invalid operand for code '%c'", code
);
21167 || !IS_VFP_REGNUM (REGNO (x
)))
21169 output_operand_lossage ("invalid operand for code '%c'", code
);
21174 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21175 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21177 output_operand_lossage ("invalid operand for code '%c'", code
);
21181 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21182 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21186 /* These two codes print the low/high doubleword register of a Neon quad
21187 register, respectively. For pair-structure types, can also print
21188 low/high quadword registers. */
21192 machine_mode mode
= GET_MODE (x
);
21195 if ((GET_MODE_SIZE (mode
) != 16
21196 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21198 output_operand_lossage ("invalid operand for code '%c'", code
);
21203 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21205 output_operand_lossage ("invalid operand for code '%c'", code
);
21209 if (GET_MODE_SIZE (mode
) == 16)
21210 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21211 + (code
== 'f' ? 1 : 0));
21213 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21214 + (code
== 'f' ? 1 : 0));
21218 /* Print a VFPv3 floating-point constant, represented as an integer
21222 int index
= vfp3_const_double_index (x
);
21223 gcc_assert (index
!= -1);
21224 fprintf (stream
, "%d", index
);
21228 /* Print bits representing opcode features for Neon.
21230 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21231 and polynomials as unsigned.
21233 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21235 Bit 2 is 1 for rounding functions, 0 otherwise. */
21237 /* Identify the type as 's', 'u', 'p' or 'f'. */
21240 HOST_WIDE_INT bits
= INTVAL (x
);
21241 fputc ("uspf"[bits
& 3], stream
);
21245 /* Likewise, but signed and unsigned integers are both 'i'. */
21248 HOST_WIDE_INT bits
= INTVAL (x
);
21249 fputc ("iipf"[bits
& 3], stream
);
21253 /* As for 'T', but emit 'u' instead of 'p'. */
21256 HOST_WIDE_INT bits
= INTVAL (x
);
21257 fputc ("usuf"[bits
& 3], stream
);
21261 /* Bit 2: rounding (vs none). */
21264 HOST_WIDE_INT bits
= INTVAL (x
);
21265 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21269 /* Memory operand for vld1/vst1 instruction. */
21273 bool postinc
= FALSE
;
21274 rtx postinc_reg
= NULL
;
21275 unsigned align
, memsize
, align_bits
;
21277 gcc_assert (MEM_P (x
));
21278 addr
= XEXP (x
, 0);
21279 if (GET_CODE (addr
) == POST_INC
)
21282 addr
= XEXP (addr
, 0);
21284 if (GET_CODE (addr
) == POST_MODIFY
)
21286 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
21287 addr
= XEXP (addr
, 0);
21289 asm_fprintf (stream
, "[%r", REGNO (addr
));
21291 /* We know the alignment of this access, so we can emit a hint in the
21292 instruction (for some alignments) as an aid to the memory subsystem
21294 align
= MEM_ALIGN (x
) >> 3;
21295 memsize
= MEM_SIZE (x
);
21297 /* Only certain alignment specifiers are supported by the hardware. */
21298 if (memsize
== 32 && (align
% 32) == 0)
21300 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21302 else if (memsize
>= 8 && (align
% 8) == 0)
21307 if (align_bits
!= 0)
21308 asm_fprintf (stream
, ":%d", align_bits
);
21310 asm_fprintf (stream
, "]");
21313 fputs("!", stream
);
21315 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
21323 gcc_assert (MEM_P (x
));
21324 addr
= XEXP (x
, 0);
21325 gcc_assert (REG_P (addr
));
21326 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21330 /* Translate an S register number into a D register number and element index. */
21333 machine_mode mode
= GET_MODE (x
);
21336 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21338 output_operand_lossage ("invalid operand for code '%c'", code
);
21343 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21345 output_operand_lossage ("invalid operand for code '%c'", code
);
21349 regno
= regno
- FIRST_VFP_REGNUM
;
21350 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21355 gcc_assert (CONST_DOUBLE_P (x
));
21357 result
= vfp3_const_double_for_fract_bits (x
);
21359 result
= vfp3_const_double_for_bits (x
);
21360 fprintf (stream
, "#%d", result
);
21363 /* Register specifier for vld1.16/vst1.16. Translate the S register
21364 number into a D register number and element index. */
21367 machine_mode mode
= GET_MODE (x
);
21370 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21372 output_operand_lossage ("invalid operand for code '%c'", code
);
21377 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21379 output_operand_lossage ("invalid operand for code '%c'", code
);
21383 regno
= regno
- FIRST_VFP_REGNUM
;
21384 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21391 output_operand_lossage ("missing operand");
21395 switch (GET_CODE (x
))
21398 asm_fprintf (stream
, "%r", REGNO (x
));
21402 output_address (GET_MODE (x
), XEXP (x
, 0));
21408 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
21409 sizeof (fpstr
), 0, 1);
21410 fprintf (stream
, "#%s", fpstr
);
21415 gcc_assert (GET_CODE (x
) != NEG
);
21416 fputc ('#', stream
);
21417 if (GET_CODE (x
) == HIGH
)
21419 fputs (":lower16:", stream
);
21423 output_addr_const (stream
, x
);
21429 /* Target hook for printing a memory address. */
21431 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
21435 int is_minus
= GET_CODE (x
) == MINUS
;
21438 asm_fprintf (stream
, "[%r]", REGNO (x
));
21439 else if (GET_CODE (x
) == PLUS
|| is_minus
)
21441 rtx base
= XEXP (x
, 0);
21442 rtx index
= XEXP (x
, 1);
21443 HOST_WIDE_INT offset
= 0;
21445 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
21447 /* Ensure that BASE is a register. */
21448 /* (one of them must be). */
21449 /* Also ensure the SP is not used as in index register. */
21450 std::swap (base
, index
);
21452 switch (GET_CODE (index
))
21455 offset
= INTVAL (index
);
21458 asm_fprintf (stream
, "[%r, #%wd]",
21459 REGNO (base
), offset
);
21463 asm_fprintf (stream
, "[%r, %s%r]",
21464 REGNO (base
), is_minus
? "-" : "",
21474 asm_fprintf (stream
, "[%r, %s%r",
21475 REGNO (base
), is_minus
? "-" : "",
21476 REGNO (XEXP (index
, 0)));
21477 arm_print_operand (stream
, index
, 'S');
21478 fputs ("]", stream
);
21483 gcc_unreachable ();
21486 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
21487 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
21489 gcc_assert (REG_P (XEXP (x
, 0)));
21491 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
21492 asm_fprintf (stream
, "[%r, #%s%d]!",
21493 REGNO (XEXP (x
, 0)),
21494 GET_CODE (x
) == PRE_DEC
? "-" : "",
21495 GET_MODE_SIZE (mode
));
21497 asm_fprintf (stream
, "[%r], #%s%d",
21498 REGNO (XEXP (x
, 0)),
21499 GET_CODE (x
) == POST_DEC
? "-" : "",
21500 GET_MODE_SIZE (mode
));
21502 else if (GET_CODE (x
) == PRE_MODIFY
)
21504 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
21505 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21506 asm_fprintf (stream
, "#%wd]!",
21507 INTVAL (XEXP (XEXP (x
, 1), 1)));
21509 asm_fprintf (stream
, "%r]!",
21510 REGNO (XEXP (XEXP (x
, 1), 1)));
21512 else if (GET_CODE (x
) == POST_MODIFY
)
21514 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
21515 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21516 asm_fprintf (stream
, "#%wd",
21517 INTVAL (XEXP (XEXP (x
, 1), 1)));
21519 asm_fprintf (stream
, "%r",
21520 REGNO (XEXP (XEXP (x
, 1), 1)));
21522 else output_addr_const (stream
, x
);
21527 asm_fprintf (stream
, "[%r]", REGNO (x
));
21528 else if (GET_CODE (x
) == POST_INC
)
21529 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
21530 else if (GET_CODE (x
) == PLUS
)
21532 gcc_assert (REG_P (XEXP (x
, 0)));
21533 if (CONST_INT_P (XEXP (x
, 1)))
21534 asm_fprintf (stream
, "[%r, #%wd]",
21535 REGNO (XEXP (x
, 0)),
21536 INTVAL (XEXP (x
, 1)));
21538 asm_fprintf (stream
, "[%r, %r]",
21539 REGNO (XEXP (x
, 0)),
21540 REGNO (XEXP (x
, 1)));
21543 output_addr_const (stream
, x
);
21547 /* Target hook for indicating whether a punctuation character for
21548 TARGET_PRINT_OPERAND is valid. */
21550 arm_print_operand_punct_valid_p (unsigned char code
)
21552 return (code
== '@' || code
== '|' || code
== '.'
21553 || code
== '(' || code
== ')' || code
== '#'
21554 || (TARGET_32BIT
&& (code
== '?'))
21555 || (TARGET_THUMB2
&& (code
== '!'))
21556 || (TARGET_THUMB
&& (code
== '_')));
21559 /* Target hook for assembling integer objects. The ARM version needs to
21560 handle word-sized values specially. */
21562 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
21566 if (size
== UNITS_PER_WORD
&& aligned_p
)
21568 fputs ("\t.word\t", asm_out_file
);
21569 output_addr_const (asm_out_file
, x
);
21571 /* Mark symbols as position independent. We only do this in the
21572 .text segment, not in the .data segment. */
21573 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
21574 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
21576 /* See legitimize_pic_address for an explanation of the
21577 TARGET_VXWORKS_RTP check. */
21578 if (!arm_pic_data_is_text_relative
21579 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
21580 fputs ("(GOT)", asm_out_file
);
21582 fputs ("(GOTOFF)", asm_out_file
);
21584 fputc ('\n', asm_out_file
);
21588 mode
= GET_MODE (x
);
21590 if (arm_vector_mode_supported_p (mode
))
21594 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21596 units
= CONST_VECTOR_NUNITS (x
);
21597 size
= GET_MODE_UNIT_SIZE (mode
);
21599 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21600 for (i
= 0; i
< units
; i
++)
21602 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21604 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
21607 for (i
= 0; i
< units
; i
++)
21609 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21611 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
21612 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
21618 return default_assemble_integer (x
, size
, aligned_p
);
21622 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
21626 if (!TARGET_AAPCS_BASED
)
21629 default_named_section_asm_out_constructor
21630 : default_named_section_asm_out_destructor
) (symbol
, priority
);
21634 /* Put these in the .init_array section, using a special relocation. */
21635 if (priority
!= DEFAULT_INIT_PRIORITY
)
21638 sprintf (buf
, "%s.%.5u",
21639 is_ctor
? ".init_array" : ".fini_array",
21641 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
21648 switch_to_section (s
);
21649 assemble_align (POINTER_SIZE
);
21650 fputs ("\t.word\t", asm_out_file
);
21651 output_addr_const (asm_out_file
, symbol
);
21652 fputs ("(target1)\n", asm_out_file
);
21655 /* Add a function to the list of static constructors. */
21658 arm_elf_asm_constructor (rtx symbol
, int priority
)
21660 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
21663 /* Add a function to the list of static destructors. */
21666 arm_elf_asm_destructor (rtx symbol
, int priority
)
21668 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
21671 /* A finite state machine takes care of noticing whether or not instructions
21672 can be conditionally executed, and thus decrease execution time and code
21673 size by deleting branch instructions. The fsm is controlled by
21674 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21676 /* The state of the fsm controlling condition codes are:
21677 0: normal, do nothing special
21678 1: make ASM_OUTPUT_OPCODE not output this instruction
21679 2: make ASM_OUTPUT_OPCODE not output this instruction
21680 3: make instructions conditional
21681 4: make instructions conditional
21683 State transitions (state->state by whom under condition):
21684 0 -> 1 final_prescan_insn if the `target' is a label
21685 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21686 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21687 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21688 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21689 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21690 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21691 (the target insn is arm_target_insn).
21693 If the jump clobbers the conditions then we use states 2 and 4.
21695 A similar thing can be done with conditional return insns.
21697 XXX In case the `target' is an unconditional branch, this conditionalising
21698 of the instructions always reduces code size, but not always execution
21699 time. But then, I want to reduce the code size to somewhere near what
21700 /bin/cc produces. */
21702 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21703 instructions. When a COND_EXEC instruction is seen the subsequent
21704 instructions are scanned so that multiple conditional instructions can be
21705 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21706 specify the length and true/false mask for the IT block. These will be
21707 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21709 /* Returns the index of the ARM condition code string in
21710 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21711 COMPARISON should be an rtx like `(eq (...) (...))'. */
21714 maybe_get_arm_condition_code (rtx comparison
)
21716 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
21717 enum arm_cond_code code
;
21718 enum rtx_code comp_code
= GET_CODE (comparison
);
21720 if (GET_MODE_CLASS (mode
) != MODE_CC
)
21721 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
21722 XEXP (comparison
, 1));
21726 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
21727 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
21728 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
21729 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
21730 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
21731 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
21732 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
21733 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
21734 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
21735 case CC_DLTUmode
: code
= ARM_CC
;
21738 if (comp_code
== EQ
)
21739 return ARM_INVERSE_CONDITION_CODE (code
);
21740 if (comp_code
== NE
)
21747 case NE
: return ARM_NE
;
21748 case EQ
: return ARM_EQ
;
21749 case GE
: return ARM_PL
;
21750 case LT
: return ARM_MI
;
21751 default: return ARM_NV
;
21757 case NE
: return ARM_NE
;
21758 case EQ
: return ARM_EQ
;
21759 default: return ARM_NV
;
21765 case NE
: return ARM_MI
;
21766 case EQ
: return ARM_PL
;
21767 default: return ARM_NV
;
21772 /* We can handle all cases except UNEQ and LTGT. */
21775 case GE
: return ARM_GE
;
21776 case GT
: return ARM_GT
;
21777 case LE
: return ARM_LS
;
21778 case LT
: return ARM_MI
;
21779 case NE
: return ARM_NE
;
21780 case EQ
: return ARM_EQ
;
21781 case ORDERED
: return ARM_VC
;
21782 case UNORDERED
: return ARM_VS
;
21783 case UNLT
: return ARM_LT
;
21784 case UNLE
: return ARM_LE
;
21785 case UNGT
: return ARM_HI
;
21786 case UNGE
: return ARM_PL
;
21787 /* UNEQ and LTGT do not have a representation. */
21788 case UNEQ
: /* Fall through. */
21789 case LTGT
: /* Fall through. */
21790 default: return ARM_NV
;
21796 case NE
: return ARM_NE
;
21797 case EQ
: return ARM_EQ
;
21798 case GE
: return ARM_LE
;
21799 case GT
: return ARM_LT
;
21800 case LE
: return ARM_GE
;
21801 case LT
: return ARM_GT
;
21802 case GEU
: return ARM_LS
;
21803 case GTU
: return ARM_CC
;
21804 case LEU
: return ARM_CS
;
21805 case LTU
: return ARM_HI
;
21806 default: return ARM_NV
;
21812 case LTU
: return ARM_CS
;
21813 case GEU
: return ARM_CC
;
21814 case NE
: return ARM_CS
;
21815 case EQ
: return ARM_CC
;
21816 default: return ARM_NV
;
21822 case NE
: return ARM_NE
;
21823 case EQ
: return ARM_EQ
;
21824 case GEU
: return ARM_CS
;
21825 case GTU
: return ARM_HI
;
21826 case LEU
: return ARM_LS
;
21827 case LTU
: return ARM_CC
;
21828 default: return ARM_NV
;
21834 case GE
: return ARM_GE
;
21835 case LT
: return ARM_LT
;
21836 case GEU
: return ARM_CS
;
21837 case LTU
: return ARM_CC
;
21838 default: return ARM_NV
;
21844 case NE
: return ARM_VS
;
21845 case EQ
: return ARM_VC
;
21846 default: return ARM_NV
;
21852 case NE
: return ARM_NE
;
21853 case EQ
: return ARM_EQ
;
21854 case GE
: return ARM_GE
;
21855 case GT
: return ARM_GT
;
21856 case LE
: return ARM_LE
;
21857 case LT
: return ARM_LT
;
21858 case GEU
: return ARM_CS
;
21859 case GTU
: return ARM_HI
;
21860 case LEU
: return ARM_LS
;
21861 case LTU
: return ARM_CC
;
21862 default: return ARM_NV
;
21865 default: gcc_unreachable ();
21869 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
21870 static enum arm_cond_code
21871 get_arm_condition_code (rtx comparison
)
21873 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
21874 gcc_assert (code
!= ARM_NV
);
21878 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
21881 thumb2_final_prescan_insn (rtx_insn
*insn
)
21883 rtx_insn
*first_insn
= insn
;
21884 rtx body
= PATTERN (insn
);
21886 enum arm_cond_code code
;
21891 /* max_insns_skipped in the tune was already taken into account in the
21892 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
21893 just emit the IT blocks as we can. It does not make sense to split
21895 max
= MAX_INSN_PER_IT_BLOCK
;
21897 /* Remove the previous insn from the count of insns to be output. */
21898 if (arm_condexec_count
)
21899 arm_condexec_count
--;
21901 /* Nothing to do if we are already inside a conditional block. */
21902 if (arm_condexec_count
)
21905 if (GET_CODE (body
) != COND_EXEC
)
21908 /* Conditional jumps are implemented directly. */
21912 predicate
= COND_EXEC_TEST (body
);
21913 arm_current_cc
= get_arm_condition_code (predicate
);
21915 n
= get_attr_ce_count (insn
);
21916 arm_condexec_count
= 1;
21917 arm_condexec_mask
= (1 << n
) - 1;
21918 arm_condexec_masklen
= n
;
21919 /* See if subsequent instructions can be combined into the same block. */
21922 insn
= next_nonnote_insn (insn
);
21924 /* Jumping into the middle of an IT block is illegal, so a label or
21925 barrier terminates the block. */
21926 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
21929 body
= PATTERN (insn
);
21930 /* USE and CLOBBER aren't really insns, so just skip them. */
21931 if (GET_CODE (body
) == USE
21932 || GET_CODE (body
) == CLOBBER
)
21935 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
21936 if (GET_CODE (body
) != COND_EXEC
)
21938 /* Maximum number of conditionally executed instructions in a block. */
21939 n
= get_attr_ce_count (insn
);
21940 if (arm_condexec_masklen
+ n
> max
)
21943 predicate
= COND_EXEC_TEST (body
);
21944 code
= get_arm_condition_code (predicate
);
21945 mask
= (1 << n
) - 1;
21946 if (arm_current_cc
== code
)
21947 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
21948 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
21951 arm_condexec_count
++;
21952 arm_condexec_masklen
+= n
;
21954 /* A jump must be the last instruction in a conditional block. */
21958 /* Restore recog_data (getting the attributes of other insns can
21959 destroy this array, but final.c assumes that it remains intact
21960 across this call). */
21961 extract_constrain_insn_cached (first_insn
);
21965 arm_final_prescan_insn (rtx_insn
*insn
)
21967 /* BODY will hold the body of INSN. */
21968 rtx body
= PATTERN (insn
);
21970 /* This will be 1 if trying to repeat the trick, and things need to be
21971 reversed if it appears to fail. */
21974 /* If we start with a return insn, we only succeed if we find another one. */
21975 int seeking_return
= 0;
21976 enum rtx_code return_code
= UNKNOWN
;
21978 /* START_INSN will hold the insn from where we start looking. This is the
21979 first insn after the following code_label if REVERSE is true. */
21980 rtx_insn
*start_insn
= insn
;
21982 /* If in state 4, check if the target branch is reached, in order to
21983 change back to state 0. */
21984 if (arm_ccfsm_state
== 4)
21986 if (insn
== arm_target_insn
)
21988 arm_target_insn
= NULL
;
21989 arm_ccfsm_state
= 0;
21994 /* If in state 3, it is possible to repeat the trick, if this insn is an
21995 unconditional branch to a label, and immediately following this branch
21996 is the previous target label which is only used once, and the label this
21997 branch jumps to is not too far off. */
21998 if (arm_ccfsm_state
== 3)
22000 if (simplejump_p (insn
))
22002 start_insn
= next_nonnote_insn (start_insn
);
22003 if (BARRIER_P (start_insn
))
22005 /* XXX Isn't this always a barrier? */
22006 start_insn
= next_nonnote_insn (start_insn
);
22008 if (LABEL_P (start_insn
)
22009 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22010 && LABEL_NUSES (start_insn
) == 1)
22015 else if (ANY_RETURN_P (body
))
22017 start_insn
= next_nonnote_insn (start_insn
);
22018 if (BARRIER_P (start_insn
))
22019 start_insn
= next_nonnote_insn (start_insn
);
22020 if (LABEL_P (start_insn
)
22021 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22022 && LABEL_NUSES (start_insn
) == 1)
22025 seeking_return
= 1;
22026 return_code
= GET_CODE (body
);
22035 gcc_assert (!arm_ccfsm_state
|| reverse
);
22036 if (!JUMP_P (insn
))
22039 /* This jump might be paralleled with a clobber of the condition codes
22040 the jump should always come first */
22041 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22042 body
= XVECEXP (body
, 0, 0);
22045 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22046 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22049 int fail
= FALSE
, succeed
= FALSE
;
22050 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22051 int then_not_else
= TRUE
;
22052 rtx_insn
*this_insn
= start_insn
;
22055 /* Register the insn jumped to. */
22058 if (!seeking_return
)
22059 label
= XEXP (SET_SRC (body
), 0);
22061 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22062 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22063 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22065 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22066 then_not_else
= FALSE
;
22068 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22070 seeking_return
= 1;
22071 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22073 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22075 seeking_return
= 1;
22076 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22077 then_not_else
= FALSE
;
22080 gcc_unreachable ();
22082 /* See how many insns this branch skips, and what kind of insns. If all
22083 insns are okay, and the label or unconditional branch to the same
22084 label is not too far away, succeed. */
22085 for (insns_skipped
= 0;
22086 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22090 this_insn
= next_nonnote_insn (this_insn
);
22094 switch (GET_CODE (this_insn
))
22097 /* Succeed if it is the target label, otherwise fail since
22098 control falls in from somewhere else. */
22099 if (this_insn
== label
)
22101 arm_ccfsm_state
= 1;
22109 /* Succeed if the following insn is the target label.
22111 If return insns are used then the last insn in a function
22112 will be a barrier. */
22113 this_insn
= next_nonnote_insn (this_insn
);
22114 if (this_insn
&& this_insn
== label
)
22116 arm_ccfsm_state
= 1;
22124 /* The AAPCS says that conditional calls should not be
22125 used since they make interworking inefficient (the
22126 linker can't transform BL<cond> into BLX). That's
22127 only a problem if the machine has BLX. */
22134 /* Succeed if the following insn is the target label, or
22135 if the following two insns are a barrier and the
22137 this_insn
= next_nonnote_insn (this_insn
);
22138 if (this_insn
&& BARRIER_P (this_insn
))
22139 this_insn
= next_nonnote_insn (this_insn
);
22141 if (this_insn
&& this_insn
== label
22142 && insns_skipped
< max_insns_skipped
)
22144 arm_ccfsm_state
= 1;
22152 /* If this is an unconditional branch to the same label, succeed.
22153 If it is to another label, do nothing. If it is conditional,
22155 /* XXX Probably, the tests for SET and the PC are
22158 scanbody
= PATTERN (this_insn
);
22159 if (GET_CODE (scanbody
) == SET
22160 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22162 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22163 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22165 arm_ccfsm_state
= 2;
22168 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22171 /* Fail if a conditional return is undesirable (e.g. on a
22172 StrongARM), but still allow this if optimizing for size. */
22173 else if (GET_CODE (scanbody
) == return_code
22174 && !use_return_insn (TRUE
, NULL
)
22177 else if (GET_CODE (scanbody
) == return_code
)
22179 arm_ccfsm_state
= 2;
22182 else if (GET_CODE (scanbody
) == PARALLEL
)
22184 switch (get_attr_conds (this_insn
))
22194 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22199 /* Instructions using or affecting the condition codes make it
22201 scanbody
= PATTERN (this_insn
);
22202 if (!(GET_CODE (scanbody
) == SET
22203 || GET_CODE (scanbody
) == PARALLEL
)
22204 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22214 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22215 arm_target_label
= CODE_LABEL_NUMBER (label
);
22218 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22220 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22222 this_insn
= next_nonnote_insn (this_insn
);
22223 gcc_assert (!this_insn
22224 || (!BARRIER_P (this_insn
)
22225 && !LABEL_P (this_insn
)));
22229 /* Oh, dear! we ran off the end.. give up. */
22230 extract_constrain_insn_cached (insn
);
22231 arm_ccfsm_state
= 0;
22232 arm_target_insn
= NULL
;
22235 arm_target_insn
= this_insn
;
22238 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22241 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22243 if (reverse
|| then_not_else
)
22244 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22247 /* Restore recog_data (getting the attributes of other insns can
22248 destroy this array, but final.c assumes that it remains intact
22249 across this call. */
22250 extract_constrain_insn_cached (insn
);
22254 /* Output IT instructions. */
22256 thumb2_asm_output_opcode (FILE * stream
)
22261 if (arm_condexec_mask
)
22263 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22264 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22266 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22267 arm_condition_codes
[arm_current_cc
]);
22268 arm_condexec_mask
= 0;
22272 /* Returns true if REGNO is a valid register
22273 for holding a quantity of type MODE. */
22275 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
22277 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22278 return (regno
== CC_REGNUM
22279 || (TARGET_HARD_FLOAT
22280 && regno
== VFPCC_REGNUM
));
22282 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
22286 /* For the Thumb we only allow values bigger than SImode in
22287 registers 0 - 6, so that there is always a second low
22288 register available to hold the upper part of the value.
22289 We probably we ought to ensure that the register is the
22290 start of an even numbered register pair. */
22291 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22293 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
22295 if (mode
== SFmode
|| mode
== SImode
)
22296 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22298 if (mode
== DFmode
)
22299 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22301 if (mode
== HFmode
)
22302 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22304 /* VFP registers can hold HImode values. */
22305 if (mode
== HImode
)
22306 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22309 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22310 || (VALID_NEON_QREG_MODE (mode
)
22311 && NEON_REGNO_OK_FOR_QUAD (regno
))
22312 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22313 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22314 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22315 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22316 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22321 if (TARGET_REALLY_IWMMXT
)
22323 if (IS_IWMMXT_GR_REGNUM (regno
))
22324 return mode
== SImode
;
22326 if (IS_IWMMXT_REGNUM (regno
))
22327 return VALID_IWMMXT_REG_MODE (mode
);
22330 /* We allow almost any value to be stored in the general registers.
22331 Restrict doubleword quantities to even register pairs in ARM state
22332 so that we can use ldrd. Do not allow very large Neon structure
22333 opaque modes in general registers; they would use too many. */
22334 if (regno
<= LAST_ARM_REGNUM
)
22336 if (ARM_NUM_REGS (mode
) > 4)
22342 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
22345 if (regno
== FRAME_POINTER_REGNUM
22346 || regno
== ARG_POINTER_REGNUM
)
22347 /* We only allow integers in the fake hard registers. */
22348 return GET_MODE_CLASS (mode
) == MODE_INT
;
22353 /* Implement MODES_TIEABLE_P. */
22356 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
22358 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22361 /* We specifically want to allow elements of "structure" modes to
22362 be tieable to the structure. This more general condition allows
22363 other rarer situations too. */
22365 && (VALID_NEON_DREG_MODE (mode1
)
22366 || VALID_NEON_QREG_MODE (mode1
)
22367 || VALID_NEON_STRUCT_MODE (mode1
))
22368 && (VALID_NEON_DREG_MODE (mode2
)
22369 || VALID_NEON_QREG_MODE (mode2
)
22370 || VALID_NEON_STRUCT_MODE (mode2
)))
22376 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22377 not used in arm mode. */
22380 arm_regno_class (int regno
)
22382 if (regno
== PC_REGNUM
)
22387 if (regno
== STACK_POINTER_REGNUM
)
22389 if (regno
== CC_REGNUM
)
22396 if (TARGET_THUMB2
&& regno
< 8)
22399 if ( regno
<= LAST_ARM_REGNUM
22400 || regno
== FRAME_POINTER_REGNUM
22401 || regno
== ARG_POINTER_REGNUM
)
22402 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
22404 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
22405 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
22407 if (IS_VFP_REGNUM (regno
))
22409 if (regno
<= D7_VFP_REGNUM
)
22410 return VFP_D0_D7_REGS
;
22411 else if (regno
<= LAST_LO_VFP_REGNUM
)
22412 return VFP_LO_REGS
;
22414 return VFP_HI_REGS
;
22417 if (IS_IWMMXT_REGNUM (regno
))
22418 return IWMMXT_REGS
;
22420 if (IS_IWMMXT_GR_REGNUM (regno
))
22421 return IWMMXT_GR_REGS
;
22426 /* Handle a special case when computing the offset
22427 of an argument from the frame pointer. */
22429 arm_debugger_arg_offset (int value
, rtx addr
)
22433 /* We are only interested if dbxout_parms() failed to compute the offset. */
22437 /* We can only cope with the case where the address is held in a register. */
22441 /* If we are using the frame pointer to point at the argument, then
22442 an offset of 0 is correct. */
22443 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
22446 /* If we are using the stack pointer to point at the
22447 argument, then an offset of 0 is correct. */
22448 /* ??? Check this is consistent with thumb2 frame layout. */
22449 if ((TARGET_THUMB
|| !frame_pointer_needed
)
22450 && REGNO (addr
) == SP_REGNUM
)
22453 /* Oh dear. The argument is pointed to by a register rather
22454 than being held in a register, or being stored at a known
22455 offset from the frame pointer. Since GDB only understands
22456 those two kinds of argument we must translate the address
22457 held in the register into an offset from the frame pointer.
22458 We do this by searching through the insns for the function
22459 looking to see where this register gets its value. If the
22460 register is initialized from the frame pointer plus an offset
22461 then we are in luck and we can continue, otherwise we give up.
22463 This code is exercised by producing debugging information
22464 for a function with arguments like this:
22466 double func (double a, double b, int c, double d) {return d;}
22468 Without this code the stab for parameter 'd' will be set to
22469 an offset of 0 from the frame pointer, rather than 8. */
22471 /* The if() statement says:
22473 If the insn is a normal instruction
22474 and if the insn is setting the value in a register
22475 and if the register being set is the register holding the address of the argument
22476 and if the address is computing by an addition
22477 that involves adding to a register
22478 which is the frame pointer
22483 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22485 if ( NONJUMP_INSN_P (insn
)
22486 && GET_CODE (PATTERN (insn
)) == SET
22487 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
22488 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
22489 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
22490 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22491 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
22494 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
22503 warning (0, "unable to compute real location of stacked parameter");
22504 value
= 8; /* XXX magic hack */
22510 /* Implement TARGET_PROMOTED_TYPE. */
22513 arm_promoted_type (const_tree t
)
22515 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
22516 return float_type_node
;
22520 /* Implement TARGET_CONVERT_TO_TYPE.
22521 Specifically, this hook implements the peculiarity of the ARM
22522 half-precision floating-point C semantics that requires conversions between
22523 __fp16 to or from double to do an intermediate conversion to float. */
22526 arm_convert_to_type (tree type
, tree expr
)
22528 tree fromtype
= TREE_TYPE (expr
);
22529 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
22531 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
22532 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
22533 return convert (type
, convert (float_type_node
, expr
));
22537 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
22538 This simply adds HFmode as a supported mode; even though we don't
22539 implement arithmetic on this type directly, it's supported by
22540 optabs conversions, much the way the double-word arithmetic is
22541 special-cased in the default hook. */
22544 arm_scalar_mode_supported_p (machine_mode mode
)
22546 if (mode
== HFmode
)
22547 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
22548 else if (ALL_FIXED_POINT_MODE_P (mode
))
22551 return default_scalar_mode_supported_p (mode
);
22554 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
22555 not to early-clobber SRC registers in the process.
22557 We assume that the operands described by SRC and DEST represent a
22558 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
22559 number of components into which the copy has been decomposed. */
22561 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
22565 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
22566 || REGNO (operands
[0]) < REGNO (operands
[1]))
22568 for (i
= 0; i
< count
; i
++)
22570 operands
[2 * i
] = dest
[i
];
22571 operands
[2 * i
+ 1] = src
[i
];
22576 for (i
= 0; i
< count
; i
++)
22578 operands
[2 * i
] = dest
[count
- i
- 1];
22579 operands
[2 * i
+ 1] = src
[count
- i
- 1];
22584 /* Split operands into moves from op[1] + op[2] into op[0]. */
22587 neon_split_vcombine (rtx operands
[3])
22589 unsigned int dest
= REGNO (operands
[0]);
22590 unsigned int src1
= REGNO (operands
[1]);
22591 unsigned int src2
= REGNO (operands
[2]);
22592 machine_mode halfmode
= GET_MODE (operands
[1]);
22593 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
22594 rtx destlo
, desthi
;
22596 if (src1
== dest
&& src2
== dest
+ halfregs
)
22598 /* No-op move. Can't split to nothing; emit something. */
22599 emit_note (NOTE_INSN_DELETED
);
22603 /* Preserve register attributes for variable tracking. */
22604 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
22605 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
22606 GET_MODE_SIZE (halfmode
));
22608 /* Special case of reversed high/low parts. Use VSWP. */
22609 if (src2
== dest
&& src1
== dest
+ halfregs
)
22611 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
22612 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
22613 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
22617 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
22619 /* Try to avoid unnecessary moves if part of the result
22620 is in the right place already. */
22622 emit_move_insn (destlo
, operands
[1]);
22623 if (src2
!= dest
+ halfregs
)
22624 emit_move_insn (desthi
, operands
[2]);
22628 if (src2
!= dest
+ halfregs
)
22629 emit_move_insn (desthi
, operands
[2]);
22631 emit_move_insn (destlo
, operands
[1]);
22635 /* Return the number (counting from 0) of
22636 the least significant set bit in MASK. */
22639 number_of_first_bit_set (unsigned mask
)
22641 return ctz_hwi (mask
);
22644 /* Like emit_multi_reg_push, but allowing for a different set of
22645 registers to be described as saved. MASK is the set of registers
22646 to be saved; REAL_REGS is the set of registers to be described as
22647 saved. If REAL_REGS is 0, only describe the stack adjustment. */
22650 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
22652 unsigned long regno
;
22653 rtx par
[10], tmp
, reg
;
22657 /* Build the parallel of the registers actually being stored. */
22658 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
22660 regno
= ctz_hwi (mask
);
22661 reg
= gen_rtx_REG (SImode
, regno
);
22664 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
22666 tmp
= gen_rtx_USE (VOIDmode
, reg
);
22671 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
22672 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22673 tmp
= gen_frame_mem (BLKmode
, tmp
);
22674 tmp
= gen_rtx_SET (tmp
, par
[0]);
22677 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
22678 insn
= emit_insn (tmp
);
22680 /* Always build the stack adjustment note for unwind info. */
22681 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
22682 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
22685 /* Build the parallel of the registers recorded as saved for unwind. */
22686 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
22688 regno
= ctz_hwi (real_regs
);
22689 reg
= gen_rtx_REG (SImode
, regno
);
22691 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
22692 tmp
= gen_frame_mem (SImode
, tmp
);
22693 tmp
= gen_rtx_SET (tmp
, reg
);
22694 RTX_FRAME_RELATED_P (tmp
) = 1;
22702 RTX_FRAME_RELATED_P (par
[0]) = 1;
22703 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
22706 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
22711 /* Emit code to push or pop registers to or from the stack. F is the
22712 assembly file. MASK is the registers to pop. */
22714 thumb_pop (FILE *f
, unsigned long mask
)
22717 int lo_mask
= mask
& 0xFF;
22718 int pushed_words
= 0;
22722 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
22724 /* Special case. Do not generate a POP PC statement here, do it in
22726 thumb_exit (f
, -1);
22730 fprintf (f
, "\tpop\t{");
22732 /* Look at the low registers first. */
22733 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
22737 asm_fprintf (f
, "%r", regno
);
22739 if ((lo_mask
& ~1) != 0)
22746 if (mask
& (1 << PC_REGNUM
))
22748 /* Catch popping the PC. */
22749 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
22750 || crtl
->calls_eh_return
)
22752 /* The PC is never poped directly, instead
22753 it is popped into r3 and then BX is used. */
22754 fprintf (f
, "}\n");
22756 thumb_exit (f
, -1);
22765 asm_fprintf (f
, "%r", PC_REGNUM
);
22769 fprintf (f
, "}\n");
22772 /* Generate code to return from a thumb function.
22773 If 'reg_containing_return_addr' is -1, then the return address is
22774 actually on the stack, at the stack pointer. */
22776 thumb_exit (FILE *f
, int reg_containing_return_addr
)
22778 unsigned regs_available_for_popping
;
22779 unsigned regs_to_pop
;
22781 unsigned available
;
22785 int restore_a4
= FALSE
;
22787 /* Compute the registers we need to pop. */
22791 if (reg_containing_return_addr
== -1)
22793 regs_to_pop
|= 1 << LR_REGNUM
;
22797 if (TARGET_BACKTRACE
)
22799 /* Restore the (ARM) frame pointer and stack pointer. */
22800 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
22804 /* If there is nothing to pop then just emit the BX instruction and
22806 if (pops_needed
== 0)
22808 if (crtl
->calls_eh_return
)
22809 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
22811 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
22814 /* Otherwise if we are not supporting interworking and we have not created
22815 a backtrace structure and the function was not entered in ARM mode then
22816 just pop the return address straight into the PC. */
22817 else if (!TARGET_INTERWORK
22818 && !TARGET_BACKTRACE
22819 && !is_called_in_ARM_mode (current_function_decl
)
22820 && !crtl
->calls_eh_return
)
22822 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
22826 /* Find out how many of the (return) argument registers we can corrupt. */
22827 regs_available_for_popping
= 0;
22829 /* If returning via __builtin_eh_return, the bottom three registers
22830 all contain information needed for the return. */
22831 if (crtl
->calls_eh_return
)
22835 /* If we can deduce the registers used from the function's
22836 return value. This is more reliable that examining
22837 df_regs_ever_live_p () because that will be set if the register is
22838 ever used in the function, not just if the register is used
22839 to hold a return value. */
22841 if (crtl
->return_rtx
!= 0)
22842 mode
= GET_MODE (crtl
->return_rtx
);
22844 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
22846 size
= GET_MODE_SIZE (mode
);
22850 /* In a void function we can use any argument register.
22851 In a function that returns a structure on the stack
22852 we can use the second and third argument registers. */
22853 if (mode
== VOIDmode
)
22854 regs_available_for_popping
=
22855 (1 << ARG_REGISTER (1))
22856 | (1 << ARG_REGISTER (2))
22857 | (1 << ARG_REGISTER (3));
22859 regs_available_for_popping
=
22860 (1 << ARG_REGISTER (2))
22861 | (1 << ARG_REGISTER (3));
22863 else if (size
<= 4)
22864 regs_available_for_popping
=
22865 (1 << ARG_REGISTER (2))
22866 | (1 << ARG_REGISTER (3));
22867 else if (size
<= 8)
22868 regs_available_for_popping
=
22869 (1 << ARG_REGISTER (3));
22872 /* Match registers to be popped with registers into which we pop them. */
22873 for (available
= regs_available_for_popping
,
22874 required
= regs_to_pop
;
22875 required
!= 0 && available
!= 0;
22876 available
&= ~(available
& - available
),
22877 required
&= ~(required
& - required
))
22880 /* If we have any popping registers left over, remove them. */
22882 regs_available_for_popping
&= ~available
;
22884 /* Otherwise if we need another popping register we can use
22885 the fourth argument register. */
22886 else if (pops_needed
)
22888 /* If we have not found any free argument registers and
22889 reg a4 contains the return address, we must move it. */
22890 if (regs_available_for_popping
== 0
22891 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
22893 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
22894 reg_containing_return_addr
= LR_REGNUM
;
22896 else if (size
> 12)
22898 /* Register a4 is being used to hold part of the return value,
22899 but we have dire need of a free, low register. */
22902 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
22905 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
22907 /* The fourth argument register is available. */
22908 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
22914 /* Pop as many registers as we can. */
22915 thumb_pop (f
, regs_available_for_popping
);
22917 /* Process the registers we popped. */
22918 if (reg_containing_return_addr
== -1)
22920 /* The return address was popped into the lowest numbered register. */
22921 regs_to_pop
&= ~(1 << LR_REGNUM
);
22923 reg_containing_return_addr
=
22924 number_of_first_bit_set (regs_available_for_popping
);
22926 /* Remove this register for the mask of available registers, so that
22927 the return address will not be corrupted by further pops. */
22928 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
22931 /* If we popped other registers then handle them here. */
22932 if (regs_available_for_popping
)
22936 /* Work out which register currently contains the frame pointer. */
22937 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
22939 /* Move it into the correct place. */
22940 asm_fprintf (f
, "\tmov\t%r, %r\n",
22941 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
22943 /* (Temporarily) remove it from the mask of popped registers. */
22944 regs_available_for_popping
&= ~(1 << frame_pointer
);
22945 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
22947 if (regs_available_for_popping
)
22951 /* We popped the stack pointer as well,
22952 find the register that contains it. */
22953 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
22955 /* Move it into the stack register. */
22956 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
22958 /* At this point we have popped all necessary registers, so
22959 do not worry about restoring regs_available_for_popping
22960 to its correct value:
22962 assert (pops_needed == 0)
22963 assert (regs_available_for_popping == (1 << frame_pointer))
22964 assert (regs_to_pop == (1 << STACK_POINTER)) */
22968 /* Since we have just move the popped value into the frame
22969 pointer, the popping register is available for reuse, and
22970 we know that we still have the stack pointer left to pop. */
22971 regs_available_for_popping
|= (1 << frame_pointer
);
22975 /* If we still have registers left on the stack, but we no longer have
22976 any registers into which we can pop them, then we must move the return
22977 address into the link register and make available the register that
22979 if (regs_available_for_popping
== 0 && pops_needed
> 0)
22981 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
22983 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
22984 reg_containing_return_addr
);
22986 reg_containing_return_addr
= LR_REGNUM
;
22989 /* If we have registers left on the stack then pop some more.
22990 We know that at most we will want to pop FP and SP. */
22991 if (pops_needed
> 0)
22996 thumb_pop (f
, regs_available_for_popping
);
22998 /* We have popped either FP or SP.
22999 Move whichever one it is into the correct register. */
23000 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23001 move_to
= number_of_first_bit_set (regs_to_pop
);
23003 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
23005 regs_to_pop
&= ~(1 << move_to
);
23010 /* If we still have not popped everything then we must have only
23011 had one register available to us and we are now popping the SP. */
23012 if (pops_needed
> 0)
23016 thumb_pop (f
, regs_available_for_popping
);
23018 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23020 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
23022 assert (regs_to_pop == (1 << STACK_POINTER))
23023 assert (pops_needed == 1)
23027 /* If necessary restore the a4 register. */
23030 if (reg_containing_return_addr
!= LR_REGNUM
)
23032 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23033 reg_containing_return_addr
= LR_REGNUM
;
23036 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
23039 if (crtl
->calls_eh_return
)
23040 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23042 /* Return to caller. */
23043 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23046 /* Scan INSN just before assembler is output for it.
23047 For Thumb-1, we track the status of the condition codes; this
23048 information is used in the cbranchsi4_insn pattern. */
23050 thumb1_final_prescan_insn (rtx_insn
*insn
)
23052 if (flag_print_asm_name
)
23053 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
23054 INSN_ADDRESSES (INSN_UID (insn
)));
23055 /* Don't overwrite the previous setter when we get to a cbranch. */
23056 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
23058 enum attr_conds conds
;
23060 if (cfun
->machine
->thumb1_cc_insn
)
23062 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
23063 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
23066 conds
= get_attr_conds (insn
);
23067 if (conds
== CONDS_SET
)
23069 rtx set
= single_set (insn
);
23070 cfun
->machine
->thumb1_cc_insn
= insn
;
23071 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
23072 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
23073 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
23074 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
23076 rtx src1
= XEXP (SET_SRC (set
), 1);
23077 if (src1
== const0_rtx
)
23078 cfun
->machine
->thumb1_cc_mode
= CCmode
;
23080 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
23082 /* Record the src register operand instead of dest because
23083 cprop_hardreg pass propagates src. */
23084 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
23087 else if (conds
!= CONDS_NOCOND
)
23088 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
23091 /* Check if unexpected far jump is used. */
23092 if (cfun
->machine
->lr_save_eliminated
23093 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23094 internal_error("Unexpected thumb1 far jump");
23098 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
23100 unsigned HOST_WIDE_INT mask
= 0xff;
23103 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
23104 if (val
== 0) /* XXX */
23107 for (i
= 0; i
< 25; i
++)
23108 if ((val
& (mask
<< i
)) == val
)
23114 /* Returns nonzero if the current function contains,
23115 or might contain a far jump. */
23117 thumb_far_jump_used_p (void)
23120 bool far_jump
= false;
23121 unsigned int func_size
= 0;
23123 /* This test is only important for leaf functions. */
23124 /* assert (!leaf_function_p ()); */
23126 /* If we have already decided that far jumps may be used,
23127 do not bother checking again, and always return true even if
23128 it turns out that they are not being used. Once we have made
23129 the decision that far jumps are present (and that hence the link
23130 register will be pushed onto the stack) we cannot go back on it. */
23131 if (cfun
->machine
->far_jump_used
)
23134 /* If this function is not being called from the prologue/epilogue
23135 generation code then it must be being called from the
23136 INITIAL_ELIMINATION_OFFSET macro. */
23137 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
23139 /* In this case we know that we are being asked about the elimination
23140 of the arg pointer register. If that register is not being used,
23141 then there are no arguments on the stack, and we do not have to
23142 worry that a far jump might force the prologue to push the link
23143 register, changing the stack offsets. In this case we can just
23144 return false, since the presence of far jumps in the function will
23145 not affect stack offsets.
23147 If the arg pointer is live (or if it was live, but has now been
23148 eliminated and so set to dead) then we do have to test to see if
23149 the function might contain a far jump. This test can lead to some
23150 false negatives, since before reload is completed, then length of
23151 branch instructions is not known, so gcc defaults to returning their
23152 longest length, which in turn sets the far jump attribute to true.
23154 A false negative will not result in bad code being generated, but it
23155 will result in a needless push and pop of the link register. We
23156 hope that this does not occur too often.
23158 If we need doubleword stack alignment this could affect the other
23159 elimination offsets so we can't risk getting it wrong. */
23160 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
23161 cfun
->machine
->arg_pointer_live
= 1;
23162 else if (!cfun
->machine
->arg_pointer_live
)
23166 /* We should not change far_jump_used during or after reload, as there is
23167 no chance to change stack frame layout. */
23168 if (reload_in_progress
|| reload_completed
)
23171 /* Check to see if the function contains a branch
23172 insn with the far jump attribute set. */
23173 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23175 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23179 func_size
+= get_attr_length (insn
);
23182 /* Attribute far_jump will always be true for thumb1 before
23183 shorten_branch pass. So checking far_jump attribute before
23184 shorten_branch isn't much useful.
23186 Following heuristic tries to estimate more accurately if a far jump
23187 may finally be used. The heuristic is very conservative as there is
23188 no chance to roll-back the decision of not to use far jump.
23190 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23191 2-byte insn is associated with a 4 byte constant pool. Using
23192 function size 2048/3 as the threshold is conservative enough. */
23195 if ((func_size
* 3) >= 2048)
23197 /* Record the fact that we have decided that
23198 the function does use far jumps. */
23199 cfun
->machine
->far_jump_used
= 1;
23207 /* Return nonzero if FUNC must be entered in ARM mode. */
23209 is_called_in_ARM_mode (tree func
)
23211 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
23213 /* Ignore the problem about functions whose address is taken. */
23214 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
23218 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
23224 /* Given the stack offsets and register mask in OFFSETS, decide how
23225 many additional registers to push instead of subtracting a constant
23226 from SP. For epilogues the principle is the same except we use pop.
23227 FOR_PROLOGUE indicates which we're generating. */
23229 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
23231 HOST_WIDE_INT amount
;
23232 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
23233 /* Extract a mask of the ones we can give to the Thumb's push/pop
23235 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
23236 /* Then count how many other high registers will need to be pushed. */
23237 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23238 int n_free
, reg_base
, size
;
23240 if (!for_prologue
&& frame_pointer_needed
)
23241 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23243 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23245 /* If the stack frame size is 512 exactly, we can save one load
23246 instruction, which should make this a win even when optimizing
23248 if (!optimize_size
&& amount
!= 512)
23251 /* Can't do this if there are high registers to push. */
23252 if (high_regs_pushed
!= 0)
23255 /* Shouldn't do it in the prologue if no registers would normally
23256 be pushed at all. In the epilogue, also allow it if we'll have
23257 a pop insn for the PC. */
23260 || TARGET_BACKTRACE
23261 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
23262 || TARGET_INTERWORK
23263 || crtl
->args
.pretend_args_size
!= 0))
23266 /* Don't do this if thumb_expand_prologue wants to emit instructions
23267 between the push and the stack frame allocation. */
23269 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23270 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
23277 size
= arm_size_return_regs ();
23278 reg_base
= ARM_NUM_INTS (size
);
23279 live_regs_mask
>>= reg_base
;
23282 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
23283 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
23285 live_regs_mask
>>= 1;
23291 gcc_assert (amount
/ 4 * 4 == amount
);
23293 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
23294 return (amount
- 508) / 4;
23295 if (amount
<= n_free
* 4)
23300 /* The bits which aren't usefully expanded as rtl. */
23302 thumb1_unexpanded_epilogue (void)
23304 arm_stack_offsets
*offsets
;
23306 unsigned long live_regs_mask
= 0;
23307 int high_regs_pushed
= 0;
23309 int had_to_push_lr
;
23312 if (cfun
->machine
->return_used_this_function
!= 0)
23315 if (IS_NAKED (arm_current_func_type ()))
23318 offsets
= arm_get_frame_offsets ();
23319 live_regs_mask
= offsets
->saved_regs_mask
;
23320 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23322 /* If we can deduce the registers used from the function's return value.
23323 This is more reliable that examining df_regs_ever_live_p () because that
23324 will be set if the register is ever used in the function, not just if
23325 the register is used to hold a return value. */
23326 size
= arm_size_return_regs ();
23328 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
23331 unsigned long extra_mask
= (1 << extra_pop
) - 1;
23332 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
23335 /* The prolog may have pushed some high registers to use as
23336 work registers. e.g. the testsuite file:
23337 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23338 compiles to produce:
23339 push {r4, r5, r6, r7, lr}
23343 as part of the prolog. We have to undo that pushing here. */
23345 if (high_regs_pushed
)
23347 unsigned long mask
= live_regs_mask
& 0xff;
23350 /* The available low registers depend on the size of the value we are
23358 /* Oh dear! We have no low registers into which we can pop
23361 ("no low registers available for popping high registers");
23363 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
23364 if (live_regs_mask
& (1 << next_hi_reg
))
23367 while (high_regs_pushed
)
23369 /* Find lo register(s) into which the high register(s) can
23371 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
23373 if (mask
& (1 << regno
))
23374 high_regs_pushed
--;
23375 if (high_regs_pushed
== 0)
23379 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
23381 /* Pop the values into the low register(s). */
23382 thumb_pop (asm_out_file
, mask
);
23384 /* Move the value(s) into the high registers. */
23385 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
23387 if (mask
& (1 << regno
))
23389 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
23392 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
23393 if (live_regs_mask
& (1 << next_hi_reg
))
23398 live_regs_mask
&= ~0x0f00;
23401 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
23402 live_regs_mask
&= 0xff;
23404 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
23406 /* Pop the return address into the PC. */
23407 if (had_to_push_lr
)
23408 live_regs_mask
|= 1 << PC_REGNUM
;
23410 /* Either no argument registers were pushed or a backtrace
23411 structure was created which includes an adjusted stack
23412 pointer, so just pop everything. */
23413 if (live_regs_mask
)
23414 thumb_pop (asm_out_file
, live_regs_mask
);
23416 /* We have either just popped the return address into the
23417 PC or it is was kept in LR for the entire function.
23418 Note that thumb_pop has already called thumb_exit if the
23419 PC was in the list. */
23420 if (!had_to_push_lr
)
23421 thumb_exit (asm_out_file
, LR_REGNUM
);
23425 /* Pop everything but the return address. */
23426 if (live_regs_mask
)
23427 thumb_pop (asm_out_file
, live_regs_mask
);
23429 if (had_to_push_lr
)
23433 /* We have no free low regs, so save one. */
23434 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
23438 /* Get the return address into a temporary register. */
23439 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
23443 /* Move the return address to lr. */
23444 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
23446 /* Restore the low register. */
23447 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
23452 regno
= LAST_ARG_REGNUM
;
23457 /* Remove the argument registers that were pushed onto the stack. */
23458 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
23459 SP_REGNUM
, SP_REGNUM
,
23460 crtl
->args
.pretend_args_size
);
23462 thumb_exit (asm_out_file
, regno
);
23468 /* Functions to save and restore machine-specific function data. */
23469 static struct machine_function
*
23470 arm_init_machine_status (void)
23472 struct machine_function
*machine
;
23473 machine
= ggc_cleared_alloc
<machine_function
> ();
23475 #if ARM_FT_UNKNOWN != 0
23476 machine
->func_type
= ARM_FT_UNKNOWN
;
23481 /* Return an RTX indicating where the return address to the
23482 calling function can be found. */
23484 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
23489 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
23492 /* Do anything needed before RTL is emitted for each function. */
23494 arm_init_expanders (void)
23496 /* Arrange to initialize and mark the machine per-function status. */
23497 init_machine_status
= arm_init_machine_status
;
23499 /* This is to stop the combine pass optimizing away the alignment
23500 adjustment of va_arg. */
23501 /* ??? It is claimed that this should not be necessary. */
23503 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
23506 /* Check that FUNC is called with a different mode. */
23509 arm_change_mode_p (tree func
)
23511 if (TREE_CODE (func
) != FUNCTION_DECL
)
23514 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
23517 callee_tree
= target_option_default_node
;
23519 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
23520 int flags
= callee_opts
->x_target_flags
;
23522 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
23525 /* Like arm_compute_initial_elimination offset. Simpler because there
23526 isn't an ABI specified frame pointer for Thumb. Instead, we set it
23527 to point at the base of the local variables after static stack
23528 space for a function has been allocated. */
23531 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
23533 arm_stack_offsets
*offsets
;
23535 offsets
= arm_get_frame_offsets ();
23539 case ARG_POINTER_REGNUM
:
23542 case STACK_POINTER_REGNUM
:
23543 return offsets
->outgoing_args
- offsets
->saved_args
;
23545 case FRAME_POINTER_REGNUM
:
23546 return offsets
->soft_frame
- offsets
->saved_args
;
23548 case ARM_HARD_FRAME_POINTER_REGNUM
:
23549 return offsets
->saved_regs
- offsets
->saved_args
;
23551 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23552 return offsets
->locals_base
- offsets
->saved_args
;
23555 gcc_unreachable ();
23559 case FRAME_POINTER_REGNUM
:
23562 case STACK_POINTER_REGNUM
:
23563 return offsets
->outgoing_args
- offsets
->soft_frame
;
23565 case ARM_HARD_FRAME_POINTER_REGNUM
:
23566 return offsets
->saved_regs
- offsets
->soft_frame
;
23568 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23569 return offsets
->locals_base
- offsets
->soft_frame
;
23572 gcc_unreachable ();
23577 gcc_unreachable ();
23581 /* Generate the function's prologue. */
23584 thumb1_expand_prologue (void)
23588 HOST_WIDE_INT amount
;
23589 HOST_WIDE_INT size
;
23590 arm_stack_offsets
*offsets
;
23591 unsigned long func_type
;
23593 unsigned long live_regs_mask
;
23594 unsigned long l_mask
;
23595 unsigned high_regs_pushed
= 0;
23596 bool lr_needs_saving
;
23598 func_type
= arm_current_func_type ();
23600 /* Naked functions don't have prologues. */
23601 if (IS_NAKED (func_type
))
23603 if (flag_stack_usage_info
)
23604 current_function_static_stack_size
= 0;
23608 if (IS_INTERRUPT (func_type
))
23610 error ("interrupt Service Routines cannot be coded in Thumb mode");
23614 if (is_called_in_ARM_mode (current_function_decl
))
23615 emit_insn (gen_prologue_thumb1_interwork ());
23617 offsets
= arm_get_frame_offsets ();
23618 live_regs_mask
= offsets
->saved_regs_mask
;
23619 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
23621 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
23622 l_mask
= live_regs_mask
& 0x40ff;
23623 /* Then count how many other high registers will need to be pushed. */
23624 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23626 if (crtl
->args
.pretend_args_size
)
23628 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
23630 if (cfun
->machine
->uses_anonymous_args
)
23632 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
23633 unsigned long mask
;
23635 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
23636 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
23638 insn
= thumb1_emit_multi_reg_push (mask
, 0);
23642 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
23643 stack_pointer_rtx
, x
));
23645 RTX_FRAME_RELATED_P (insn
) = 1;
23648 if (TARGET_BACKTRACE
)
23650 HOST_WIDE_INT offset
= 0;
23651 unsigned work_register
;
23652 rtx work_reg
, x
, arm_hfp_rtx
;
23654 /* We have been asked to create a stack backtrace structure.
23655 The code looks like this:
23659 0 sub SP, #16 Reserve space for 4 registers.
23660 2 push {R7} Push low registers.
23661 4 add R7, SP, #20 Get the stack pointer before the push.
23662 6 str R7, [SP, #8] Store the stack pointer
23663 (before reserving the space).
23664 8 mov R7, PC Get hold of the start of this code + 12.
23665 10 str R7, [SP, #16] Store it.
23666 12 mov R7, FP Get hold of the current frame pointer.
23667 14 str R7, [SP, #4] Store it.
23668 16 mov R7, LR Get hold of the current return address.
23669 18 str R7, [SP, #12] Store it.
23670 20 add R7, SP, #16 Point at the start of the
23671 backtrace structure.
23672 22 mov FP, R7 Put this value into the frame pointer. */
23674 work_register
= thumb_find_work_register (live_regs_mask
);
23675 work_reg
= gen_rtx_REG (SImode
, work_register
);
23676 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
23678 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
23679 stack_pointer_rtx
, GEN_INT (-16)));
23680 RTX_FRAME_RELATED_P (insn
) = 1;
23684 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
23685 RTX_FRAME_RELATED_P (insn
) = 1;
23686 lr_needs_saving
= false;
23688 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
23691 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
23692 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
23694 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
23695 x
= gen_frame_mem (SImode
, x
);
23696 emit_move_insn (x
, work_reg
);
23698 /* Make sure that the instruction fetching the PC is in the right place
23699 to calculate "start of backtrace creation code + 12". */
23700 /* ??? The stores using the common WORK_REG ought to be enough to
23701 prevent the scheduler from doing anything weird. Failing that
23702 we could always move all of the following into an UNSPEC_VOLATILE. */
23705 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
23706 emit_move_insn (work_reg
, x
);
23708 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
23709 x
= gen_frame_mem (SImode
, x
);
23710 emit_move_insn (x
, work_reg
);
23712 emit_move_insn (work_reg
, arm_hfp_rtx
);
23714 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
23715 x
= gen_frame_mem (SImode
, x
);
23716 emit_move_insn (x
, work_reg
);
23720 emit_move_insn (work_reg
, arm_hfp_rtx
);
23722 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
23723 x
= gen_frame_mem (SImode
, x
);
23724 emit_move_insn (x
, work_reg
);
23726 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
23727 emit_move_insn (work_reg
, x
);
23729 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
23730 x
= gen_frame_mem (SImode
, x
);
23731 emit_move_insn (x
, work_reg
);
23734 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
23735 emit_move_insn (work_reg
, x
);
23737 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
23738 x
= gen_frame_mem (SImode
, x
);
23739 emit_move_insn (x
, work_reg
);
23741 x
= GEN_INT (offset
+ 12);
23742 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
23744 emit_move_insn (arm_hfp_rtx
, work_reg
);
23746 /* Optimization: If we are not pushing any low registers but we are going
23747 to push some high registers then delay our first push. This will just
23748 be a push of LR and we can combine it with the push of the first high
23750 else if ((l_mask
& 0xff) != 0
23751 || (high_regs_pushed
== 0 && lr_needs_saving
))
23753 unsigned long mask
= l_mask
;
23754 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
23755 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
23756 RTX_FRAME_RELATED_P (insn
) = 1;
23757 lr_needs_saving
= false;
23760 if (high_regs_pushed
)
23762 unsigned pushable_regs
;
23763 unsigned next_hi_reg
;
23764 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
23765 : crtl
->args
.info
.nregs
;
23766 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
23768 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
23769 if (live_regs_mask
& (1 << next_hi_reg
))
23772 /* Here we need to mask out registers used for passing arguments
23773 even if they can be pushed. This is to avoid using them to stash the high
23774 registers. Such kind of stash may clobber the use of arguments. */
23775 pushable_regs
= l_mask
& (~arg_regs_mask
);
23776 if (lr_needs_saving
)
23777 pushable_regs
&= ~(1 << LR_REGNUM
);
23779 if (pushable_regs
== 0)
23780 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
23782 while (high_regs_pushed
> 0)
23784 unsigned long real_regs_mask
= 0;
23785 unsigned long push_mask
= 0;
23787 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
23789 if (pushable_regs
& (1 << regno
))
23791 emit_move_insn (gen_rtx_REG (SImode
, regno
),
23792 gen_rtx_REG (SImode
, next_hi_reg
));
23794 high_regs_pushed
--;
23795 real_regs_mask
|= (1 << next_hi_reg
);
23796 push_mask
|= (1 << regno
);
23798 if (high_regs_pushed
)
23800 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
23802 if (live_regs_mask
& (1 << next_hi_reg
))
23810 /* If we had to find a work register and we have not yet
23811 saved the LR then add it to the list of regs to push. */
23812 if (lr_needs_saving
)
23814 push_mask
|= 1 << LR_REGNUM
;
23815 real_regs_mask
|= 1 << LR_REGNUM
;
23816 lr_needs_saving
= false;
23819 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
23820 RTX_FRAME_RELATED_P (insn
) = 1;
23824 /* Load the pic register before setting the frame pointer,
23825 so we can use r7 as a temporary work register. */
23826 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23827 arm_load_pic_register (live_regs_mask
);
23829 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
23830 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
23831 stack_pointer_rtx
);
23833 size
= offsets
->outgoing_args
- offsets
->saved_args
;
23834 if (flag_stack_usage_info
)
23835 current_function_static_stack_size
= size
;
23837 /* If we have a frame, then do stack checking. FIXME: not implemented. */
23838 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
23839 sorry ("-fstack-check=specific for Thumb-1");
23841 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23842 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
23847 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23848 GEN_INT (- amount
)));
23849 RTX_FRAME_RELATED_P (insn
) = 1;
23855 /* The stack decrement is too big for an immediate value in a single
23856 insn. In theory we could issue multiple subtracts, but after
23857 three of them it becomes more space efficient to place the full
23858 value in the constant pool and load into a register. (Also the
23859 ARM debugger really likes to see only one stack decrement per
23860 function). So instead we look for a scratch register into which
23861 we can load the decrement, and then we subtract this from the
23862 stack pointer. Unfortunately on the thumb the only available
23863 scratch registers are the argument registers, and we cannot use
23864 these as they may hold arguments to the function. Instead we
23865 attempt to locate a call preserved register which is used by this
23866 function. If we can find one, then we know that it will have
23867 been pushed at the start of the prologue and so we can corrupt
23869 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
23870 if (live_regs_mask
& (1 << regno
))
23873 gcc_assert(regno
<= LAST_LO_REGNUM
);
23875 reg
= gen_rtx_REG (SImode
, regno
);
23877 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
23879 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
23880 stack_pointer_rtx
, reg
));
23882 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23883 plus_constant (Pmode
, stack_pointer_rtx
,
23885 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23886 RTX_FRAME_RELATED_P (insn
) = 1;
23890 if (frame_pointer_needed
)
23891 thumb_set_frame_pointer (offsets
);
23893 /* If we are profiling, make sure no instructions are scheduled before
23894 the call to mcount. Similarly if the user has requested no
23895 scheduling in the prolog. Similarly if we want non-call exceptions
23896 using the EABI unwinder, to prevent faulting instructions from being
23897 swapped with a stack adjustment. */
23898 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
23899 || (arm_except_unwind_info (&global_options
) == UI_TARGET
23900 && cfun
->can_throw_non_call_exceptions
))
23901 emit_insn (gen_blockage ());
23903 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
23904 if (live_regs_mask
& 0xff)
23905 cfun
->machine
->lr_save_eliminated
= 0;
23908 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
23909 POP instruction can be generated. LR should be replaced by PC. All
23910 the checks required are already done by USE_RETURN_INSN (). Hence,
23911 all we really need to check here is if single register is to be
23912 returned, or multiple register return. */
23914 thumb2_expand_return (bool simple_return
)
23917 unsigned long saved_regs_mask
;
23918 arm_stack_offsets
*offsets
;
23920 offsets
= arm_get_frame_offsets ();
23921 saved_regs_mask
= offsets
->saved_regs_mask
;
23923 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
23924 if (saved_regs_mask
& (1 << i
))
23927 if (!simple_return
&& saved_regs_mask
)
23931 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
23932 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
23933 rtx addr
= gen_rtx_MEM (SImode
,
23934 gen_rtx_POST_INC (SImode
,
23935 stack_pointer_rtx
));
23936 set_mem_alias_set (addr
, get_frame_alias_set ());
23937 XVECEXP (par
, 0, 0) = ret_rtx
;
23938 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
23939 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
23940 emit_jump_insn (par
);
23944 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
23945 saved_regs_mask
|= (1 << PC_REGNUM
);
23946 arm_emit_multi_reg_pop (saved_regs_mask
);
23951 emit_jump_insn (simple_return_rtx
);
23956 thumb1_expand_epilogue (void)
23958 HOST_WIDE_INT amount
;
23959 arm_stack_offsets
*offsets
;
23962 /* Naked functions don't have prologues. */
23963 if (IS_NAKED (arm_current_func_type ()))
23966 offsets
= arm_get_frame_offsets ();
23967 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23969 if (frame_pointer_needed
)
23971 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
23972 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23974 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
23976 gcc_assert (amount
>= 0);
23979 emit_insn (gen_blockage ());
23982 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23983 GEN_INT (amount
)));
23986 /* r3 is always free in the epilogue. */
23987 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
23989 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
23990 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
23994 /* Emit a USE (stack_pointer_rtx), so that
23995 the stack adjustment will not be deleted. */
23996 emit_insn (gen_force_register_use (stack_pointer_rtx
));
23998 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
23999 emit_insn (gen_blockage ());
24001 /* Emit a clobber for each insn that will be restored in the epilogue,
24002 so that flow2 will get register lifetimes correct. */
24003 for (regno
= 0; regno
< 13; regno
++)
24004 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
24005 emit_clobber (gen_rtx_REG (SImode
, regno
));
24007 if (! df_regs_ever_live_p (LR_REGNUM
))
24008 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
24011 /* Epilogue code for APCS frame. */
24013 arm_expand_epilogue_apcs_frame (bool really_return
)
24015 unsigned long func_type
;
24016 unsigned long saved_regs_mask
;
24019 int floats_from_frame
= 0;
24020 arm_stack_offsets
*offsets
;
24022 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
24023 func_type
= arm_current_func_type ();
24025 /* Get frame offsets for ARM. */
24026 offsets
= arm_get_frame_offsets ();
24027 saved_regs_mask
= offsets
->saved_regs_mask
;
24029 /* Find the offset of the floating-point save area in the frame. */
24031 = (offsets
->saved_args
24032 + arm_compute_static_chain_stack_bytes ()
24035 /* Compute how many core registers saved and how far away the floats are. */
24036 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24037 if (saved_regs_mask
& (1 << i
))
24040 floats_from_frame
+= 4;
24043 if (TARGET_HARD_FLOAT
)
24046 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
24048 /* The offset is from IP_REGNUM. */
24049 int saved_size
= arm_get_vfp_saved_size ();
24050 if (saved_size
> 0)
24053 floats_from_frame
+= saved_size
;
24054 insn
= emit_insn (gen_addsi3 (ip_rtx
,
24055 hard_frame_pointer_rtx
,
24056 GEN_INT (-floats_from_frame
)));
24057 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
24058 ip_rtx
, hard_frame_pointer_rtx
);
24061 /* Generate VFP register multi-pop. */
24062 start_reg
= FIRST_VFP_REGNUM
;
24064 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
24065 /* Look for a case where a reg does not need restoring. */
24066 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24067 && (!df_regs_ever_live_p (i
+ 1)
24068 || call_used_regs
[i
+ 1]))
24070 if (start_reg
!= i
)
24071 arm_emit_vfp_multi_reg_pop (start_reg
,
24072 (i
- start_reg
) / 2,
24073 gen_rtx_REG (SImode
,
24078 /* Restore the remaining regs that we have discovered (or possibly
24079 even all of them, if the conditional in the for loop never
24081 if (start_reg
!= i
)
24082 arm_emit_vfp_multi_reg_pop (start_reg
,
24083 (i
- start_reg
) / 2,
24084 gen_rtx_REG (SImode
, IP_REGNUM
));
24089 /* The frame pointer is guaranteed to be non-double-word aligned, as
24090 it is set to double-word-aligned old_stack_pointer - 4. */
24092 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
24094 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
24095 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24097 rtx addr
= gen_frame_mem (V2SImode
,
24098 plus_constant (Pmode
, hard_frame_pointer_rtx
,
24100 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24101 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24102 gen_rtx_REG (V2SImode
, i
),
24108 /* saved_regs_mask should contain IP which contains old stack pointer
24109 at the time of activation creation. Since SP and IP are adjacent registers,
24110 we can restore the value directly into SP. */
24111 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
24112 saved_regs_mask
&= ~(1 << IP_REGNUM
);
24113 saved_regs_mask
|= (1 << SP_REGNUM
);
24115 /* There are two registers left in saved_regs_mask - LR and PC. We
24116 only need to restore LR (the return address), but to
24117 save time we can load it directly into PC, unless we need a
24118 special function exit sequence, or we are not really returning. */
24120 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
24121 && !crtl
->calls_eh_return
)
24122 /* Delete LR from the register mask, so that LR on
24123 the stack is loaded into the PC in the register mask. */
24124 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24126 saved_regs_mask
&= ~(1 << PC_REGNUM
);
24128 num_regs
= bit_count (saved_regs_mask
);
24129 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
24132 emit_insn (gen_blockage ());
24133 /* Unwind the stack to just below the saved registers. */
24134 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24135 hard_frame_pointer_rtx
,
24136 GEN_INT (- 4 * num_regs
)));
24138 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
24139 stack_pointer_rtx
, hard_frame_pointer_rtx
);
24142 arm_emit_multi_reg_pop (saved_regs_mask
);
24144 if (IS_INTERRUPT (func_type
))
24146 /* Interrupt handlers will have pushed the
24147 IP onto the stack, so restore it now. */
24149 rtx addr
= gen_rtx_MEM (SImode
,
24150 gen_rtx_POST_INC (SImode
,
24151 stack_pointer_rtx
));
24152 set_mem_alias_set (addr
, get_frame_alias_set ());
24153 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
24154 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24155 gen_rtx_REG (SImode
, IP_REGNUM
),
24159 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
24162 if (crtl
->calls_eh_return
)
24163 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24165 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24167 if (IS_STACKALIGN (func_type
))
24168 /* Restore the original stack pointer. Before prologue, the stack was
24169 realigned and the original stack pointer saved in r0. For details,
24170 see comment in arm_expand_prologue. */
24171 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
24173 emit_jump_insn (simple_return_rtx
);
24176 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24177 function is not a sibcall. */
24179 arm_expand_epilogue (bool really_return
)
24181 unsigned long func_type
;
24182 unsigned long saved_regs_mask
;
24186 arm_stack_offsets
*offsets
;
24188 func_type
= arm_current_func_type ();
24190 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24191 let output_return_instruction take care of instruction emission if any. */
24192 if (IS_NAKED (func_type
)
24193 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
24196 emit_jump_insn (simple_return_rtx
);
24200 /* If we are throwing an exception, then we really must be doing a
24201 return, so we can't tail-call. */
24202 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
24204 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
24206 arm_expand_epilogue_apcs_frame (really_return
);
24210 /* Get frame offsets for ARM. */
24211 offsets
= arm_get_frame_offsets ();
24212 saved_regs_mask
= offsets
->saved_regs_mask
;
24213 num_regs
= bit_count (saved_regs_mask
);
24215 if (frame_pointer_needed
)
24218 /* Restore stack pointer if necessary. */
24221 /* In ARM mode, frame pointer points to first saved register.
24222 Restore stack pointer to last saved register. */
24223 amount
= offsets
->frame
- offsets
->saved_regs
;
24225 /* Force out any pending memory operations that reference stacked data
24226 before stack de-allocation occurs. */
24227 emit_insn (gen_blockage ());
24228 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24229 hard_frame_pointer_rtx
,
24230 GEN_INT (amount
)));
24231 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24233 hard_frame_pointer_rtx
);
24235 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24237 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24241 /* In Thumb-2 mode, the frame pointer points to the last saved
24243 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24246 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
24247 hard_frame_pointer_rtx
,
24248 GEN_INT (amount
)));
24249 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24250 hard_frame_pointer_rtx
,
24251 hard_frame_pointer_rtx
);
24254 /* Force out any pending memory operations that reference stacked data
24255 before stack de-allocation occurs. */
24256 emit_insn (gen_blockage ());
24257 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
24258 hard_frame_pointer_rtx
));
24259 arm_add_cfa_adjust_cfa_note (insn
, 0,
24261 hard_frame_pointer_rtx
);
24262 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24264 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24269 /* Pop off outgoing args and local frame to adjust stack pointer to
24270 last saved register. */
24271 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24275 /* Force out any pending memory operations that reference stacked data
24276 before stack de-allocation occurs. */
24277 emit_insn (gen_blockage ());
24278 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24280 GEN_INT (amount
)));
24281 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
24282 stack_pointer_rtx
, stack_pointer_rtx
);
24283 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24285 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24289 if (TARGET_HARD_FLOAT
)
24291 /* Generate VFP register multi-pop. */
24292 int end_reg
= LAST_VFP_REGNUM
+ 1;
24294 /* Scan the registers in reverse order. We need to match
24295 any groupings made in the prologue and generate matching
24296 vldm operations. The need to match groups is because,
24297 unlike pop, vldm can only do consecutive regs. */
24298 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
24299 /* Look for a case where a reg does not need restoring. */
24300 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24301 && (!df_regs_ever_live_p (i
+ 1)
24302 || call_used_regs
[i
+ 1]))
24304 /* Restore the regs discovered so far (from reg+2 to
24306 if (end_reg
> i
+ 2)
24307 arm_emit_vfp_multi_reg_pop (i
+ 2,
24308 (end_reg
- (i
+ 2)) / 2,
24309 stack_pointer_rtx
);
24313 /* Restore the remaining regs that we have discovered (or possibly
24314 even all of them, if the conditional in the for loop never
24316 if (end_reg
> i
+ 2)
24317 arm_emit_vfp_multi_reg_pop (i
+ 2,
24318 (end_reg
- (i
+ 2)) / 2,
24319 stack_pointer_rtx
);
24323 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
24324 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24327 rtx addr
= gen_rtx_MEM (V2SImode
,
24328 gen_rtx_POST_INC (SImode
,
24329 stack_pointer_rtx
));
24330 set_mem_alias_set (addr
, get_frame_alias_set ());
24331 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24332 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24333 gen_rtx_REG (V2SImode
, i
),
24335 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
24336 stack_pointer_rtx
, stack_pointer_rtx
);
24339 if (saved_regs_mask
)
24342 bool return_in_pc
= false;
24344 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
24345 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
24346 && !IS_STACKALIGN (func_type
)
24348 && crtl
->args
.pretend_args_size
== 0
24349 && saved_regs_mask
& (1 << LR_REGNUM
)
24350 && !crtl
->calls_eh_return
)
24352 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24353 saved_regs_mask
|= (1 << PC_REGNUM
);
24354 return_in_pc
= true;
24357 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
24359 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24360 if (saved_regs_mask
& (1 << i
))
24362 rtx addr
= gen_rtx_MEM (SImode
,
24363 gen_rtx_POST_INC (SImode
,
24364 stack_pointer_rtx
));
24365 set_mem_alias_set (addr
, get_frame_alias_set ());
24367 if (i
== PC_REGNUM
)
24369 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24370 XVECEXP (insn
, 0, 0) = ret_rtx
;
24371 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
24373 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
24374 insn
= emit_jump_insn (insn
);
24378 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
24380 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24381 gen_rtx_REG (SImode
, i
),
24383 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
24385 stack_pointer_rtx
);
24392 && current_tune
->prefer_ldrd_strd
24393 && !optimize_function_for_size_p (cfun
))
24396 thumb2_emit_ldrd_pop (saved_regs_mask
);
24397 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
24398 arm_emit_ldrd_pop (saved_regs_mask
);
24400 arm_emit_multi_reg_pop (saved_regs_mask
);
24403 arm_emit_multi_reg_pop (saved_regs_mask
);
24411 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
24415 rtx dwarf
= NULL_RTX
;
24417 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24419 GEN_INT (amount
)));
24421 RTX_FRAME_RELATED_P (tmp
) = 1;
24423 if (cfun
->machine
->uses_anonymous_args
)
24425 /* Restore pretend args. Refer arm_expand_prologue on how to save
24426 pretend_args in stack. */
24427 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
24428 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
24429 for (j
= 0, i
= 0; j
< num_regs
; i
++)
24430 if (saved_regs_mask
& (1 << i
))
24432 rtx reg
= gen_rtx_REG (SImode
, i
);
24433 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
24436 REG_NOTES (tmp
) = dwarf
;
24438 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
24439 stack_pointer_rtx
, stack_pointer_rtx
);
24442 if (!really_return
)
24445 if (crtl
->calls_eh_return
)
24446 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24448 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24450 if (IS_STACKALIGN (func_type
))
24451 /* Restore the original stack pointer. Before prologue, the stack was
24452 realigned and the original stack pointer saved in r0. For details,
24453 see comment in arm_expand_prologue. */
24454 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
24456 emit_jump_insn (simple_return_rtx
);
24459 /* Implementation of insn prologue_thumb1_interwork. This is the first
24460 "instruction" of a function called in ARM mode. Swap to thumb mode. */
24463 thumb1_output_interwork (void)
24466 FILE *f
= asm_out_file
;
24468 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
24469 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
24471 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
24473 /* Generate code sequence to switch us into Thumb mode. */
24474 /* The .code 32 directive has already been emitted by
24475 ASM_DECLARE_FUNCTION_NAME. */
24476 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
24477 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
24479 /* Generate a label, so that the debugger will notice the
24480 change in instruction sets. This label is also used by
24481 the assembler to bypass the ARM code when this function
24482 is called from a Thumb encoded function elsewhere in the
24483 same file. Hence the definition of STUB_NAME here must
24484 agree with the definition in gas/config/tc-arm.c. */
24486 #define STUB_NAME ".real_start_of"
24488 fprintf (f
, "\t.code\t16\n");
24490 if (arm_dllexport_name_p (name
))
24491 name
= arm_strip_name_encoding (name
);
24493 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
24494 fprintf (f
, "\t.thumb_func\n");
24495 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
24500 /* Handle the case of a double word load into a low register from
24501 a computed memory address. The computed address may involve a
24502 register which is overwritten by the load. */
24504 thumb_load_double_from_address (rtx
*operands
)
24512 gcc_assert (REG_P (operands
[0]));
24513 gcc_assert (MEM_P (operands
[1]));
24515 /* Get the memory address. */
24516 addr
= XEXP (operands
[1], 0);
24518 /* Work out how the memory address is computed. */
24519 switch (GET_CODE (addr
))
24522 operands
[2] = adjust_address (operands
[1], SImode
, 4);
24524 if (REGNO (operands
[0]) == REGNO (addr
))
24526 output_asm_insn ("ldr\t%H0, %2", operands
);
24527 output_asm_insn ("ldr\t%0, %1", operands
);
24531 output_asm_insn ("ldr\t%0, %1", operands
);
24532 output_asm_insn ("ldr\t%H0, %2", operands
);
24537 /* Compute <address> + 4 for the high order load. */
24538 operands
[2] = adjust_address (operands
[1], SImode
, 4);
24540 output_asm_insn ("ldr\t%0, %1", operands
);
24541 output_asm_insn ("ldr\t%H0, %2", operands
);
24545 arg1
= XEXP (addr
, 0);
24546 arg2
= XEXP (addr
, 1);
24548 if (CONSTANT_P (arg1
))
24549 base
= arg2
, offset
= arg1
;
24551 base
= arg1
, offset
= arg2
;
24553 gcc_assert (REG_P (base
));
24555 /* Catch the case of <address> = <reg> + <reg> */
24556 if (REG_P (offset
))
24558 int reg_offset
= REGNO (offset
);
24559 int reg_base
= REGNO (base
);
24560 int reg_dest
= REGNO (operands
[0]);
24562 /* Add the base and offset registers together into the
24563 higher destination register. */
24564 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
24565 reg_dest
+ 1, reg_base
, reg_offset
);
24567 /* Load the lower destination register from the address in
24568 the higher destination register. */
24569 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
24570 reg_dest
, reg_dest
+ 1);
24572 /* Load the higher destination register from its own address
24574 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
24575 reg_dest
+ 1, reg_dest
+ 1);
24579 /* Compute <address> + 4 for the high order load. */
24580 operands
[2] = adjust_address (operands
[1], SImode
, 4);
24582 /* If the computed address is held in the low order register
24583 then load the high order register first, otherwise always
24584 load the low order register first. */
24585 if (REGNO (operands
[0]) == REGNO (base
))
24587 output_asm_insn ("ldr\t%H0, %2", operands
);
24588 output_asm_insn ("ldr\t%0, %1", operands
);
24592 output_asm_insn ("ldr\t%0, %1", operands
);
24593 output_asm_insn ("ldr\t%H0, %2", operands
);
24599 /* With no registers to worry about we can just load the value
24601 operands
[2] = adjust_address (operands
[1], SImode
, 4);
24603 output_asm_insn ("ldr\t%H0, %2", operands
);
24604 output_asm_insn ("ldr\t%0, %1", operands
);
24608 gcc_unreachable ();
24615 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
24620 if (REGNO (operands
[4]) > REGNO (operands
[5]))
24621 std::swap (operands
[4], operands
[5]);
24623 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
24624 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
24628 if (REGNO (operands
[4]) > REGNO (operands
[5]))
24629 std::swap (operands
[4], operands
[5]);
24630 if (REGNO (operands
[5]) > REGNO (operands
[6]))
24631 std::swap (operands
[5], operands
[6]);
24632 if (REGNO (operands
[4]) > REGNO (operands
[5]))
24633 std::swap (operands
[4], operands
[5]);
24635 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
24636 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
24640 gcc_unreachable ();
24646 /* Output a call-via instruction for thumb state. */
24648 thumb_call_via_reg (rtx reg
)
24650 int regno
= REGNO (reg
);
24653 gcc_assert (regno
< LR_REGNUM
);
24655 /* If we are in the normal text section we can use a single instance
24656 per compilation unit. If we are doing function sections, then we need
24657 an entry per section, since we can't rely on reachability. */
24658 if (in_section
== text_section
)
24660 thumb_call_reg_needed
= 1;
24662 if (thumb_call_via_label
[regno
] == NULL
)
24663 thumb_call_via_label
[regno
] = gen_label_rtx ();
24664 labelp
= thumb_call_via_label
+ regno
;
24668 if (cfun
->machine
->call_via
[regno
] == NULL
)
24669 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
24670 labelp
= cfun
->machine
->call_via
+ regno
;
24673 output_asm_insn ("bl\t%a0", labelp
);
24677 /* Routines for generating rtl. */
24679 thumb_expand_movmemqi (rtx
*operands
)
24681 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
24682 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
24683 HOST_WIDE_INT len
= INTVAL (operands
[2]);
24684 HOST_WIDE_INT offset
= 0;
24688 emit_insn (gen_movmem12b (out
, in
, out
, in
));
24694 emit_insn (gen_movmem8b (out
, in
, out
, in
));
24700 rtx reg
= gen_reg_rtx (SImode
);
24701 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
24702 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
24709 rtx reg
= gen_reg_rtx (HImode
);
24710 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
24711 plus_constant (Pmode
, in
,
24713 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
24722 rtx reg
= gen_reg_rtx (QImode
);
24723 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
24724 plus_constant (Pmode
, in
,
24726 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
24733 thumb_reload_out_hi (rtx
*operands
)
24735 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
24738 /* Return the length of a function name prefix
24739 that starts with the character 'c'. */
24741 arm_get_strip_length (int c
)
24745 ARM_NAME_ENCODING_LENGTHS
24750 /* Return a pointer to a function's name with any
24751 and all prefix encodings stripped from it. */
24753 arm_strip_name_encoding (const char *name
)
24757 while ((skip
= arm_get_strip_length (* name
)))
24763 /* If there is a '*' anywhere in the name's prefix, then
24764 emit the stripped name verbatim, otherwise prepend an
24765 underscore if leading underscores are being used. */
24767 arm_asm_output_labelref (FILE *stream
, const char *name
)
24772 while ((skip
= arm_get_strip_length (* name
)))
24774 verbatim
|= (*name
== '*');
24779 fputs (name
, stream
);
24781 asm_fprintf (stream
, "%U%s", name
);
24784 /* This function is used to emit an EABI tag and its associated value.
24785 We emit the numerical value of the tag in case the assembler does not
24786 support textual tags. (Eg gas prior to 2.20). If requested we include
24787 the tag name in a comment so that anyone reading the assembler output
24788 will know which tag is being set.
24790 This function is not static because arm-c.c needs it too. */
24793 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
24795 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
24796 if (flag_verbose_asm
|| flag_debug_asm
)
24797 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
24798 asm_fprintf (asm_out_file
, "\n");
24801 /* This function is used to print CPU tuning information as comment
24802 in assembler file. Pointers are not printed for now. */
24805 arm_print_tune_info (void)
24807 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
24808 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
24809 current_tune
->constant_limit
);
24810 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
24811 current_tune
->max_insns_skipped
);
24812 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
24813 current_tune
->prefetch
.num_slots
);
24814 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
24815 current_tune
->prefetch
.l1_cache_size
);
24816 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
24817 current_tune
->prefetch
.l1_cache_line_size
);
24818 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
24819 (int) current_tune
->prefer_constant_pool
);
24820 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
24821 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
24822 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
24823 current_tune
->branch_cost (false, false));
24824 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
24825 current_tune
->branch_cost (false, true));
24826 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
24827 current_tune
->branch_cost (true, false));
24828 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
24829 current_tune
->branch_cost (true, true));
24830 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
24831 (int) current_tune
->prefer_ldrd_strd
);
24832 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
24833 (int) current_tune
->logical_op_non_short_circuit_thumb
,
24834 (int) current_tune
->logical_op_non_short_circuit_arm
);
24835 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
24836 (int) current_tune
->prefer_neon_for_64bits
);
24837 asm_fprintf (asm_out_file
,
24838 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
24839 (int) current_tune
->disparage_flag_setting_t16_encodings
);
24840 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
24841 (int) current_tune
->string_ops_prefer_neon
);
24842 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
24843 current_tune
->max_insns_inline_memset
);
24844 asm_fprintf (asm_out_file
, "\t\t@fusible_ops:\t%u\n",
24845 current_tune
->fusible_ops
);
24846 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
24847 (int) current_tune
->sched_autopref
);
24851 arm_file_start (void)
24857 if (arm_selected_arch
)
24859 /* armv7ve doesn't support any extensions. */
24860 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
24862 /* Keep backward compatability for assemblers
24863 which don't support armv7ve. */
24864 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
24865 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
24866 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
24867 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
24868 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
24872 const char* pos
= strchr (arm_selected_arch
->name
, '+');
24876 gcc_assert (strlen (arm_selected_arch
->name
)
24877 <= sizeof (buf
) / sizeof (*pos
));
24878 strncpy (buf
, arm_selected_arch
->name
,
24879 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
24880 buf
[pos
- arm_selected_arch
->name
] = '\0';
24881 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
24882 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
24885 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
24888 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
24889 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
24892 const char* truncated_name
24893 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
24894 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
24897 if (print_tune_info
)
24898 arm_print_tune_info ();
24900 if (! TARGET_SOFT_FLOAT
)
24902 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
24903 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
24905 if (TARGET_HARD_FLOAT_ABI
)
24906 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
24909 /* Some of these attributes only apply when the corresponding features
24910 are used. However we don't have any easy way of figuring this out.
24911 Conservatively record the setting that would have been used. */
24913 if (flag_rounding_math
)
24914 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
24916 if (!flag_unsafe_math_optimizations
)
24918 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
24919 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
24921 if (flag_signaling_nans
)
24922 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
24924 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
24925 flag_finite_math_only
? 1 : 3);
24927 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
24928 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
24929 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
24930 flag_short_enums
? 1 : 2);
24932 /* Tag_ABI_optimization_goals. */
24935 else if (optimize
>= 2)
24941 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
24943 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
24946 if (arm_fp16_format
)
24947 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
24948 (int) arm_fp16_format
);
24950 if (arm_lang_output_object_attributes_hook
)
24951 arm_lang_output_object_attributes_hook();
24954 default_file_start ();
24958 arm_file_end (void)
24962 if (NEED_INDICATE_EXEC_STACK
)
24963 /* Add .note.GNU-stack. */
24964 file_end_indicate_exec_stack ();
24966 if (! thumb_call_reg_needed
)
24969 switch_to_section (text_section
);
24970 asm_fprintf (asm_out_file
, "\t.code 16\n");
24971 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
24973 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
24975 rtx label
= thumb_call_via_label
[regno
];
24979 targetm
.asm_out
.internal_label (asm_out_file
, "L",
24980 CODE_LABEL_NUMBER (label
));
24981 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
24987 /* Symbols in the text segment can be accessed without indirecting via the
24988 constant pool; it may take an extra binary operation, but this is still
24989 faster than indirecting via memory. Don't do this when not optimizing,
24990 since we won't be calculating al of the offsets necessary to do this
24994 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
24996 if (optimize
> 0 && TREE_CONSTANT (decl
))
24997 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
24999 default_encode_section_info (decl
, rtl
, first
);
25001 #endif /* !ARM_PE */
25004 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
25006 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
25007 && !strcmp (prefix
, "L"))
25009 arm_ccfsm_state
= 0;
25010 arm_target_insn
= NULL
;
25012 default_internal_label (stream
, prefix
, labelno
);
25015 /* Output code to add DELTA to the first argument, and then jump
25016 to FUNCTION. Used for C++ multiple inheritance. */
25019 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
25020 HOST_WIDE_INT
, tree function
)
25022 static int thunk_label
= 0;
25025 int mi_delta
= delta
;
25026 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
25028 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
25031 mi_delta
= - mi_delta
;
25033 final_start_function (emit_barrier (), file
, 1);
25037 int labelno
= thunk_label
++;
25038 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
25039 /* Thunks are entered in arm mode when avaiable. */
25040 if (TARGET_THUMB1_ONLY
)
25042 /* push r3 so we can use it as a temporary. */
25043 /* TODO: Omit this save if r3 is not used. */
25044 fputs ("\tpush {r3}\n", file
);
25045 fputs ("\tldr\tr3, ", file
);
25049 fputs ("\tldr\tr12, ", file
);
25051 assemble_name (file
, label
);
25052 fputc ('\n', file
);
25055 /* If we are generating PIC, the ldr instruction below loads
25056 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25057 the address of the add + 8, so we have:
25059 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25062 Note that we have "+ 1" because some versions of GNU ld
25063 don't set the low bit of the result for R_ARM_REL32
25064 relocations against thumb function symbols.
25065 On ARMv6M this is +4, not +8. */
25066 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
25067 assemble_name (file
, labelpc
);
25068 fputs (":\n", file
);
25069 if (TARGET_THUMB1_ONLY
)
25071 /* This is 2 insns after the start of the thunk, so we know it
25072 is 4-byte aligned. */
25073 fputs ("\tadd\tr3, pc, r3\n", file
);
25074 fputs ("\tmov r12, r3\n", file
);
25077 fputs ("\tadd\tr12, pc, r12\n", file
);
25079 else if (TARGET_THUMB1_ONLY
)
25080 fputs ("\tmov r12, r3\n", file
);
25082 if (TARGET_THUMB1_ONLY
)
25084 if (mi_delta
> 255)
25086 fputs ("\tldr\tr3, ", file
);
25087 assemble_name (file
, label
);
25088 fputs ("+4\n", file
);
25089 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
25090 mi_op
, this_regno
, this_regno
);
25092 else if (mi_delta
!= 0)
25094 /* Thumb1 unified syntax requires s suffix in instruction name when
25095 one of the operands is immediate. */
25096 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
25097 mi_op
, this_regno
, this_regno
,
25103 /* TODO: Use movw/movt for large constants when available. */
25104 while (mi_delta
!= 0)
25106 if ((mi_delta
& (3 << shift
)) == 0)
25110 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
25111 mi_op
, this_regno
, this_regno
,
25112 mi_delta
& (0xff << shift
));
25113 mi_delta
&= ~(0xff << shift
);
25120 if (TARGET_THUMB1_ONLY
)
25121 fputs ("\tpop\t{r3}\n", file
);
25123 fprintf (file
, "\tbx\tr12\n");
25124 ASM_OUTPUT_ALIGN (file
, 2);
25125 assemble_name (file
, label
);
25126 fputs (":\n", file
);
25129 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25130 rtx tem
= XEXP (DECL_RTL (function
), 0);
25131 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25132 pipeline offset is four rather than eight. Adjust the offset
25134 tem
= plus_constant (GET_MODE (tem
), tem
,
25135 TARGET_THUMB1_ONLY
? -3 : -7);
25136 tem
= gen_rtx_MINUS (GET_MODE (tem
),
25138 gen_rtx_SYMBOL_REF (Pmode
,
25139 ggc_strdup (labelpc
)));
25140 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
25143 /* Output ".word .LTHUNKn". */
25144 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
25146 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
25147 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
25151 fputs ("\tb\t", file
);
25152 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
25153 if (NEED_PLT_RELOC
)
25154 fputs ("(PLT)", file
);
25155 fputc ('\n', file
);
25158 final_end_function ();
25161 /* MI thunk handling for TARGET_32BIT. */
25164 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
25165 HOST_WIDE_INT vcall_offset
, tree function
)
25167 /* On ARM, this_regno is R0 or R1 depending on
25168 whether the function returns an aggregate or not.
25170 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
25172 ? R1_REGNUM
: R0_REGNUM
);
25174 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
25175 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
25176 reload_completed
= 1;
25177 emit_note (NOTE_INSN_PROLOGUE_END
);
25179 /* Add DELTA to THIS_RTX. */
25181 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
25182 delta
, this_rtx
, this_rtx
, false);
25184 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
25185 if (vcall_offset
!= 0)
25187 /* Load *THIS_RTX. */
25188 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
25189 /* Compute *THIS_RTX + VCALL_OFFSET. */
25190 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
25192 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
25193 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
25194 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
25197 /* Generate a tail call to the target function. */
25198 if (!TREE_USED (function
))
25200 assemble_external (function
);
25201 TREE_USED (function
) = 1;
25203 rtx funexp
= XEXP (DECL_RTL (function
), 0);
25204 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
25205 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
25206 SIBLING_CALL_P (insn
) = 1;
25208 insn
= get_insns ();
25209 shorten_branches (insn
);
25210 final_start_function (insn
, file
, 1);
25211 final (insn
, file
, 1);
25212 final_end_function ();
25214 /* Stop pretending this is a post-reload pass. */
25215 reload_completed
= 0;
25218 /* Output code to add DELTA to the first argument, and then jump
25219 to FUNCTION. Used for C++ multiple inheritance. */
25222 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
25223 HOST_WIDE_INT vcall_offset
, tree function
)
25226 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
25228 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
25232 arm_emit_vector_const (FILE *file
, rtx x
)
25235 const char * pattern
;
25237 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
25239 switch (GET_MODE (x
))
25241 case V2SImode
: pattern
= "%08x"; break;
25242 case V4HImode
: pattern
= "%04x"; break;
25243 case V8QImode
: pattern
= "%02x"; break;
25244 default: gcc_unreachable ();
25247 fprintf (file
, "0x");
25248 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
25252 element
= CONST_VECTOR_ELT (x
, i
);
25253 fprintf (file
, pattern
, INTVAL (element
));
25259 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25260 HFmode constant pool entries are actually loaded with ldr. */
25262 arm_emit_fp16_const (rtx c
)
25266 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
25267 if (WORDS_BIG_ENDIAN
)
25268 assemble_zeros (2);
25269 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
25270 if (!WORDS_BIG_ENDIAN
)
25271 assemble_zeros (2);
25275 arm_output_load_gr (rtx
*operands
)
25282 if (!MEM_P (operands
[1])
25283 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
25284 || !REG_P (reg
= XEXP (sum
, 0))
25285 || !CONST_INT_P (offset
= XEXP (sum
, 1))
25286 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
25287 return "wldrw%?\t%0, %1";
25289 /* Fix up an out-of-range load of a GR register. */
25290 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
25291 wcgr
= operands
[0];
25293 output_asm_insn ("ldr%?\t%0, %1", operands
);
25295 operands
[0] = wcgr
;
25297 output_asm_insn ("tmcr%?\t%0, %1", operands
);
25298 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
25303 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25305 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25306 named arg and all anonymous args onto the stack.
25307 XXX I know the prologue shouldn't be pushing registers, but it is faster
25311 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
25315 int second_time ATTRIBUTE_UNUSED
)
25317 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
25320 cfun
->machine
->uses_anonymous_args
= 1;
25321 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
25323 nregs
= pcum
->aapcs_ncrn
;
25324 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
25328 nregs
= pcum
->nregs
;
25330 if (nregs
< NUM_ARG_REGS
)
25331 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
25334 /* We can't rely on the caller doing the proper promotion when
25335 using APCS or ATPCS. */
25338 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
25340 return !TARGET_AAPCS_BASED
;
25343 static machine_mode
25344 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
25346 int *punsignedp ATTRIBUTE_UNUSED
,
25347 const_tree fntype ATTRIBUTE_UNUSED
,
25348 int for_return ATTRIBUTE_UNUSED
)
25350 if (GET_MODE_CLASS (mode
) == MODE_INT
25351 && GET_MODE_SIZE (mode
) < 4)
25357 /* AAPCS based ABIs use short enums by default. */
25360 arm_default_short_enums (void)
25362 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
25366 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25369 arm_align_anon_bitfield (void)
25371 return TARGET_AAPCS_BASED
;
25375 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25378 arm_cxx_guard_type (void)
25380 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
25384 /* The EABI says test the least significant bit of a guard variable. */
25387 arm_cxx_guard_mask_bit (void)
25389 return TARGET_AAPCS_BASED
;
25393 /* The EABI specifies that all array cookies are 8 bytes long. */
25396 arm_get_cookie_size (tree type
)
25400 if (!TARGET_AAPCS_BASED
)
25401 return default_cxx_get_cookie_size (type
);
25403 size
= build_int_cst (sizetype
, 8);
25408 /* The EABI says that array cookies should also contain the element size. */
25411 arm_cookie_has_size (void)
25413 return TARGET_AAPCS_BASED
;
25417 /* The EABI says constructors and destructors should return a pointer to
25418 the object constructed/destroyed. */
25421 arm_cxx_cdtor_returns_this (void)
25423 return TARGET_AAPCS_BASED
;
25426 /* The EABI says that an inline function may never be the key
25430 arm_cxx_key_method_may_be_inline (void)
25432 return !TARGET_AAPCS_BASED
;
25436 arm_cxx_determine_class_data_visibility (tree decl
)
25438 if (!TARGET_AAPCS_BASED
25439 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
25442 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
25443 is exported. However, on systems without dynamic vague linkage,
25444 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
25445 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
25446 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
25448 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
25449 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
25453 arm_cxx_class_data_always_comdat (void)
25455 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
25456 vague linkage if the class has no key function. */
25457 return !TARGET_AAPCS_BASED
;
25461 /* The EABI says __aeabi_atexit should be used to register static
25465 arm_cxx_use_aeabi_atexit (void)
25467 return TARGET_AAPCS_BASED
;
25472 arm_set_return_address (rtx source
, rtx scratch
)
25474 arm_stack_offsets
*offsets
;
25475 HOST_WIDE_INT delta
;
25477 unsigned long saved_regs
;
25479 offsets
= arm_get_frame_offsets ();
25480 saved_regs
= offsets
->saved_regs_mask
;
25482 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
25483 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
25486 if (frame_pointer_needed
)
25487 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
25490 /* LR will be the first saved register. */
25491 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
25496 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
25497 GEN_INT (delta
& ~4095)));
25502 addr
= stack_pointer_rtx
;
25504 addr
= plus_constant (Pmode
, addr
, delta
);
25506 /* The store needs to be marked as frame related in order to prevent
25507 DSE from deleting it as dead if it is based on fp. */
25508 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
25509 RTX_FRAME_RELATED_P (insn
) = 1;
25510 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
25516 thumb_set_return_address (rtx source
, rtx scratch
)
25518 arm_stack_offsets
*offsets
;
25519 HOST_WIDE_INT delta
;
25520 HOST_WIDE_INT limit
;
25523 unsigned long mask
;
25527 offsets
= arm_get_frame_offsets ();
25528 mask
= offsets
->saved_regs_mask
;
25529 if (mask
& (1 << LR_REGNUM
))
25532 /* Find the saved regs. */
25533 if (frame_pointer_needed
)
25535 delta
= offsets
->soft_frame
- offsets
->saved_args
;
25536 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
25542 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
25545 /* Allow for the stack frame. */
25546 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
25548 /* The link register is always the first saved register. */
25551 /* Construct the address. */
25552 addr
= gen_rtx_REG (SImode
, reg
);
25555 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
25556 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
25560 addr
= plus_constant (Pmode
, addr
, delta
);
25562 /* The store needs to be marked as frame related in order to prevent
25563 DSE from deleting it as dead if it is based on fp. */
25564 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
25565 RTX_FRAME_RELATED_P (insn
) = 1;
25566 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
25569 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
25572 /* Implements target hook vector_mode_supported_p. */
25574 arm_vector_mode_supported_p (machine_mode mode
)
25576 /* Neon also supports V2SImode, etc. listed in the clause below. */
25577 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
25578 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
25579 || mode
== V2DImode
|| mode
== V8HFmode
))
25582 if ((TARGET_NEON
|| TARGET_IWMMXT
)
25583 && ((mode
== V2SImode
)
25584 || (mode
== V4HImode
)
25585 || (mode
== V8QImode
)))
25588 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
25589 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
25590 || mode
== V2HAmode
))
25596 /* Implements target hook array_mode_supported_p. */
25599 arm_array_mode_supported_p (machine_mode mode
,
25600 unsigned HOST_WIDE_INT nelems
)
25603 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
25604 && (nelems
>= 2 && nelems
<= 4))
25610 /* Use the option -mvectorize-with-neon-double to override the use of quardword
25611 registers when autovectorizing for Neon, at least until multiple vector
25612 widths are supported properly by the middle-end. */
25614 static machine_mode
25615 arm_preferred_simd_mode (machine_mode mode
)
25621 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
25623 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
25625 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
25627 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
25629 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
25636 if (TARGET_REALLY_IWMMXT
)
25652 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
25654 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
25655 using r0-r4 for function arguments, r7 for the stack frame and don't have
25656 enough left over to do doubleword arithmetic. For Thumb-2 all the
25657 potentially problematic instructions accept high registers so this is not
25658 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
25659 that require many low registers. */
25661 arm_class_likely_spilled_p (reg_class_t rclass
)
25663 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
25664 || rclass
== CC_REG
)
25670 /* Implements target hook small_register_classes_for_mode_p. */
25672 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
25674 return TARGET_THUMB1
;
25677 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
25678 ARM insns and therefore guarantee that the shift count is modulo 256.
25679 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
25680 guarantee no particular behavior for out-of-range counts. */
25682 static unsigned HOST_WIDE_INT
25683 arm_shift_truncation_mask (machine_mode mode
)
25685 return mode
== SImode
? 255 : 0;
25689 /* Map internal gcc register numbers to DWARF2 register numbers. */
25692 arm_dbx_register_number (unsigned int regno
)
25697 if (IS_VFP_REGNUM (regno
))
25699 /* See comment in arm_dwarf_register_span. */
25700 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
25701 return 64 + regno
- FIRST_VFP_REGNUM
;
25703 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
25706 if (IS_IWMMXT_GR_REGNUM (regno
))
25707 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
25709 if (IS_IWMMXT_REGNUM (regno
))
25710 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
25712 return DWARF_FRAME_REGISTERS
;
25715 /* Dwarf models VFPv3 registers as 32 64-bit registers.
25716 GCC models tham as 64 32-bit registers, so we need to describe this to
25717 the DWARF generation code. Other registers can use the default. */
25719 arm_dwarf_register_span (rtx rtl
)
25727 regno
= REGNO (rtl
);
25728 if (!IS_VFP_REGNUM (regno
))
25731 /* XXX FIXME: The EABI defines two VFP register ranges:
25732 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
25734 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
25735 corresponding D register. Until GDB supports this, we shall use the
25736 legacy encodings. We also use these encodings for D0-D15 for
25737 compatibility with older debuggers. */
25738 mode
= GET_MODE (rtl
);
25739 if (GET_MODE_SIZE (mode
) < 8)
25742 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
25744 nregs
= GET_MODE_SIZE (mode
) / 4;
25745 for (i
= 0; i
< nregs
; i
+= 2)
25746 if (TARGET_BIG_END
)
25748 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
25749 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
25753 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
25754 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
25759 nregs
= GET_MODE_SIZE (mode
) / 8;
25760 for (i
= 0; i
< nregs
; i
++)
25761 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
25764 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
25767 #if ARM_UNWIND_INFO
25768 /* Emit unwind directives for a store-multiple instruction or stack pointer
25769 push during alignment.
25770 These should only ever be generated by the function prologue code, so
25771 expect them to have a particular form.
25772 The store-multiple instruction sometimes pushes pc as the last register,
25773 although it should not be tracked into unwind information, or for -Os
25774 sometimes pushes some dummy registers before first register that needs
25775 to be tracked in unwind information; such dummy registers are there just
25776 to avoid separate stack adjustment, and will not be restored in the
25780 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
25783 HOST_WIDE_INT offset
;
25784 HOST_WIDE_INT nregs
;
25788 unsigned padfirst
= 0, padlast
= 0;
25791 e
= XVECEXP (p
, 0, 0);
25792 gcc_assert (GET_CODE (e
) == SET
);
25794 /* First insn will adjust the stack pointer. */
25795 gcc_assert (GET_CODE (e
) == SET
25796 && REG_P (SET_DEST (e
))
25797 && REGNO (SET_DEST (e
)) == SP_REGNUM
25798 && GET_CODE (SET_SRC (e
)) == PLUS
);
25800 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
25801 nregs
= XVECLEN (p
, 0) - 1;
25802 gcc_assert (nregs
);
25804 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
25807 /* For -Os dummy registers can be pushed at the beginning to
25808 avoid separate stack pointer adjustment. */
25809 e
= XVECEXP (p
, 0, 1);
25810 e
= XEXP (SET_DEST (e
), 0);
25811 if (GET_CODE (e
) == PLUS
)
25812 padfirst
= INTVAL (XEXP (e
, 1));
25813 gcc_assert (padfirst
== 0 || optimize_size
);
25814 /* The function prologue may also push pc, but not annotate it as it is
25815 never restored. We turn this into a stack pointer adjustment. */
25816 e
= XVECEXP (p
, 0, nregs
);
25817 e
= XEXP (SET_DEST (e
), 0);
25818 if (GET_CODE (e
) == PLUS
)
25819 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
25821 padlast
= offset
- 4;
25822 gcc_assert (padlast
== 0 || padlast
== 4);
25824 fprintf (asm_out_file
, "\t.pad #4\n");
25826 fprintf (asm_out_file
, "\t.save {");
25828 else if (IS_VFP_REGNUM (reg
))
25831 fprintf (asm_out_file
, "\t.vsave {");
25834 /* Unknown register type. */
25835 gcc_unreachable ();
25837 /* If the stack increment doesn't match the size of the saved registers,
25838 something has gone horribly wrong. */
25839 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
25843 /* The remaining insns will describe the stores. */
25844 for (i
= 1; i
<= nregs
; i
++)
25846 /* Expect (set (mem <addr>) (reg)).
25847 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
25848 e
= XVECEXP (p
, 0, i
);
25849 gcc_assert (GET_CODE (e
) == SET
25850 && MEM_P (SET_DEST (e
))
25851 && REG_P (SET_SRC (e
)));
25853 reg
= REGNO (SET_SRC (e
));
25854 gcc_assert (reg
>= lastreg
);
25857 fprintf (asm_out_file
, ", ");
25858 /* We can't use %r for vfp because we need to use the
25859 double precision register names. */
25860 if (IS_VFP_REGNUM (reg
))
25861 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
25863 asm_fprintf (asm_out_file
, "%r", reg
);
25867 /* Check that the addresses are consecutive. */
25868 e
= XEXP (SET_DEST (e
), 0);
25869 if (GET_CODE (e
) == PLUS
)
25870 gcc_assert (REG_P (XEXP (e
, 0))
25871 && REGNO (XEXP (e
, 0)) == SP_REGNUM
25872 && CONST_INT_P (XEXP (e
, 1))
25873 && offset
== INTVAL (XEXP (e
, 1)));
25877 && REGNO (e
) == SP_REGNUM
);
25878 offset
+= reg_size
;
25881 fprintf (asm_out_file
, "}\n");
25883 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
25886 /* Emit unwind directives for a SET. */
25889 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
25897 switch (GET_CODE (e0
))
25900 /* Pushing a single register. */
25901 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
25902 || !REG_P (XEXP (XEXP (e0
, 0), 0))
25903 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
25906 asm_fprintf (asm_out_file
, "\t.save ");
25907 if (IS_VFP_REGNUM (REGNO (e1
)))
25908 asm_fprintf(asm_out_file
, "{d%d}\n",
25909 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
25911 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
25915 if (REGNO (e0
) == SP_REGNUM
)
25917 /* A stack increment. */
25918 if (GET_CODE (e1
) != PLUS
25919 || !REG_P (XEXP (e1
, 0))
25920 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
25921 || !CONST_INT_P (XEXP (e1
, 1)))
25924 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
25925 -INTVAL (XEXP (e1
, 1)));
25927 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
25929 HOST_WIDE_INT offset
;
25931 if (GET_CODE (e1
) == PLUS
)
25933 if (!REG_P (XEXP (e1
, 0))
25934 || !CONST_INT_P (XEXP (e1
, 1)))
25936 reg
= REGNO (XEXP (e1
, 0));
25937 offset
= INTVAL (XEXP (e1
, 1));
25938 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
25939 HARD_FRAME_POINTER_REGNUM
, reg
,
25942 else if (REG_P (e1
))
25945 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
25946 HARD_FRAME_POINTER_REGNUM
, reg
);
25951 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
25953 /* Move from sp to reg. */
25954 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
25956 else if (GET_CODE (e1
) == PLUS
25957 && REG_P (XEXP (e1
, 0))
25958 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
25959 && CONST_INT_P (XEXP (e1
, 1)))
25961 /* Set reg to offset from sp. */
25962 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
25963 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
25975 /* Emit unwind directives for the given insn. */
25978 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
25981 bool handled_one
= false;
25983 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
25986 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
25987 && (TREE_NOTHROW (current_function_decl
)
25988 || crtl
->all_throwers_are_sibcalls
))
25991 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
25994 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
25996 switch (REG_NOTE_KIND (note
))
25998 case REG_FRAME_RELATED_EXPR
:
25999 pat
= XEXP (note
, 0);
26002 case REG_CFA_REGISTER
:
26003 pat
= XEXP (note
, 0);
26006 pat
= PATTERN (insn
);
26007 if (GET_CODE (pat
) == PARALLEL
)
26008 pat
= XVECEXP (pat
, 0, 0);
26011 /* Only emitted for IS_STACKALIGN re-alignment. */
26016 src
= SET_SRC (pat
);
26017 dest
= SET_DEST (pat
);
26019 gcc_assert (src
== stack_pointer_rtx
);
26020 reg
= REGNO (dest
);
26021 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26024 handled_one
= true;
26027 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26028 to get correct dwarf information for shrink-wrap. We should not
26029 emit unwind information for it because these are used either for
26030 pretend arguments or notes to adjust sp and restore registers from
26032 case REG_CFA_DEF_CFA
:
26033 case REG_CFA_ADJUST_CFA
:
26034 case REG_CFA_RESTORE
:
26037 case REG_CFA_EXPRESSION
:
26038 case REG_CFA_OFFSET
:
26039 /* ??? Only handling here what we actually emit. */
26040 gcc_unreachable ();
26048 pat
= PATTERN (insn
);
26051 switch (GET_CODE (pat
))
26054 arm_unwind_emit_set (asm_out_file
, pat
);
26058 /* Store multiple. */
26059 arm_unwind_emit_sequence (asm_out_file
, pat
);
26068 /* Output a reference from a function exception table to the type_info
26069 object X. The EABI specifies that the symbol should be relocated by
26070 an R_ARM_TARGET2 relocation. */
26073 arm_output_ttype (rtx x
)
26075 fputs ("\t.word\t", asm_out_file
);
26076 output_addr_const (asm_out_file
, x
);
26077 /* Use special relocations for symbol references. */
26078 if (!CONST_INT_P (x
))
26079 fputs ("(TARGET2)", asm_out_file
);
26080 fputc ('\n', asm_out_file
);
26085 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26088 arm_asm_emit_except_personality (rtx personality
)
26090 fputs ("\t.personality\t", asm_out_file
);
26091 output_addr_const (asm_out_file
, personality
);
26092 fputc ('\n', asm_out_file
);
26094 #endif /* ARM_UNWIND_INFO */
26096 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26099 arm_asm_init_sections (void)
26101 #if ARM_UNWIND_INFO
26102 exception_section
= get_unnamed_section (0, output_section_asm_op
,
26104 #endif /* ARM_UNWIND_INFO */
26106 #ifdef OBJECT_FORMAT_ELF
26107 if (target_pure_code
)
26108 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
26112 /* Output unwind directives for the start/end of a function. */
26115 arm_output_fn_unwind (FILE * f
, bool prologue
)
26117 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26121 fputs ("\t.fnstart\n", f
);
26124 /* If this function will never be unwound, then mark it as such.
26125 The came condition is used in arm_unwind_emit to suppress
26126 the frame annotations. */
26127 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26128 && (TREE_NOTHROW (current_function_decl
)
26129 || crtl
->all_throwers_are_sibcalls
))
26130 fputs("\t.cantunwind\n", f
);
26132 fputs ("\t.fnend\n", f
);
26137 arm_emit_tls_decoration (FILE *fp
, rtx x
)
26139 enum tls_reloc reloc
;
26142 val
= XVECEXP (x
, 0, 0);
26143 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
26145 output_addr_const (fp
, val
);
26150 fputs ("(tlsgd)", fp
);
26153 fputs ("(tlsldm)", fp
);
26156 fputs ("(tlsldo)", fp
);
26159 fputs ("(gottpoff)", fp
);
26162 fputs ("(tpoff)", fp
);
26165 fputs ("(tlsdesc)", fp
);
26168 gcc_unreachable ();
26177 fputs (" + (. - ", fp
);
26178 output_addr_const (fp
, XVECEXP (x
, 0, 2));
26179 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26180 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
26181 output_addr_const (fp
, XVECEXP (x
, 0, 3));
26191 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26194 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
26196 gcc_assert (size
== 4);
26197 fputs ("\t.word\t", file
);
26198 output_addr_const (file
, x
);
26199 fputs ("(tlsldo)", file
);
26202 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26205 arm_output_addr_const_extra (FILE *fp
, rtx x
)
26207 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
26208 return arm_emit_tls_decoration (fp
, x
);
26209 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
26212 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
26214 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
26215 assemble_name_raw (fp
, label
);
26219 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
26221 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
26225 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26229 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
26231 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26235 output_addr_const (fp
, XVECEXP (x
, 0, 1));
26239 else if (GET_CODE (x
) == CONST_VECTOR
)
26240 return arm_emit_vector_const (fp
, x
);
26245 /* Output assembly for a shift instruction.
26246 SET_FLAGS determines how the instruction modifies the condition codes.
26247 0 - Do not set condition codes.
26248 1 - Set condition codes.
26249 2 - Use smallest instruction. */
26251 arm_output_shift(rtx
* operands
, int set_flags
)
26254 static const char flag_chars
[3] = {'?', '.', '!'};
26259 c
= flag_chars
[set_flags
];
26260 shift
= shift_op(operands
[3], &val
);
26264 operands
[2] = GEN_INT(val
);
26265 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
26268 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
26270 output_asm_insn (pattern
, operands
);
26274 /* Output assembly for a WMMX immediate shift instruction. */
26276 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
26278 int shift
= INTVAL (operands
[2]);
26280 machine_mode opmode
= GET_MODE (operands
[0]);
26282 gcc_assert (shift
>= 0);
26284 /* If the shift value in the register versions is > 63 (for D qualifier),
26285 31 (for W qualifier) or 15 (for H qualifier). */
26286 if (((opmode
== V4HImode
) && (shift
> 15))
26287 || ((opmode
== V2SImode
) && (shift
> 31))
26288 || ((opmode
== DImode
) && (shift
> 63)))
26292 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26293 output_asm_insn (templ
, operands
);
26294 if (opmode
== DImode
)
26296 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
26297 output_asm_insn (templ
, operands
);
26302 /* The destination register will contain all zeros. */
26303 sprintf (templ
, "wzero\t%%0");
26304 output_asm_insn (templ
, operands
);
26309 if ((opmode
== DImode
) && (shift
> 32))
26311 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26312 output_asm_insn (templ
, operands
);
26313 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
26314 output_asm_insn (templ
, operands
);
26318 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
26319 output_asm_insn (templ
, operands
);
26324 /* Output assembly for a WMMX tinsr instruction. */
26326 arm_output_iwmmxt_tinsr (rtx
*operands
)
26328 int mask
= INTVAL (operands
[3]);
26331 int units
= mode_nunits
[GET_MODE (operands
[0])];
26332 gcc_assert ((mask
& (mask
- 1)) == 0);
26333 for (i
= 0; i
< units
; ++i
)
26335 if ((mask
& 0x01) == 1)
26341 gcc_assert (i
< units
);
26343 switch (GET_MODE (operands
[0]))
26346 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
26349 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
26352 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
26355 gcc_unreachable ();
26358 output_asm_insn (templ
, operands
);
26363 /* Output a Thumb-1 casesi dispatch sequence. */
26365 thumb1_output_casesi (rtx
*operands
)
26367 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
26369 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
26371 switch (GET_MODE(diff_vec
))
26374 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26375 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26377 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26378 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26380 return "bl\t%___gnu_thumb1_case_si";
26382 gcc_unreachable ();
26386 /* Output a Thumb-2 casesi instruction. */
26388 thumb2_output_casesi (rtx
*operands
)
26390 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
26392 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
26394 output_asm_insn ("cmp\t%0, %1", operands
);
26395 output_asm_insn ("bhi\t%l3", operands
);
26396 switch (GET_MODE(diff_vec
))
26399 return "tbb\t[%|pc, %0]";
26401 return "tbh\t[%|pc, %0, lsl #1]";
26405 output_asm_insn ("adr\t%4, %l2", operands
);
26406 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
26407 output_asm_insn ("add\t%4, %4, %5", operands
);
26412 output_asm_insn ("adr\t%4, %l2", operands
);
26413 return "ldr\t%|pc, [%4, %0, lsl #2]";
26416 gcc_unreachable ();
26420 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
26421 per-core tuning structs. */
26423 arm_issue_rate (void)
26425 return current_tune
->issue_rate
;
26428 /* Return how many instructions should scheduler lookahead to choose the
26431 arm_first_cycle_multipass_dfa_lookahead (void)
26433 int issue_rate
= arm_issue_rate ();
26435 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
26438 /* Enable modeling of L2 auto-prefetcher. */
26440 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
26442 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
26446 arm_mangle_type (const_tree type
)
26448 /* The ARM ABI documents (10th October 2008) say that "__va_list"
26449 has to be managled as if it is in the "std" namespace. */
26450 if (TARGET_AAPCS_BASED
26451 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
26452 return "St9__va_list";
26454 /* Half-precision float. */
26455 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
26458 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
26460 if (TYPE_NAME (type
) != NULL
)
26461 return arm_mangle_builtin_type (type
);
26463 /* Use the default mangling. */
26467 /* Order of allocation of core registers for Thumb: this allocation is
26468 written over the corresponding initial entries of the array
26469 initialized with REG_ALLOC_ORDER. We allocate all low registers
26470 first. Saving and restoring a low register is usually cheaper than
26471 using a call-clobbered high register. */
26473 static const int thumb_core_reg_alloc_order
[] =
26475 3, 2, 1, 0, 4, 5, 6, 7,
26476 14, 12, 8, 9, 10, 11
26479 /* Adjust register allocation order when compiling for Thumb. */
26482 arm_order_regs_for_local_alloc (void)
26484 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
26485 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
26487 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
26488 sizeof (thumb_core_reg_alloc_order
));
26491 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
26494 arm_frame_pointer_required (void)
26496 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
26499 /* If the function receives nonlocal gotos, it needs to save the frame
26500 pointer in the nonlocal_goto_save_area object. */
26501 if (cfun
->has_nonlocal_label
)
26504 /* The frame pointer is required for non-leaf APCS frames. */
26505 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !leaf_function_p ())
26508 /* If we are probing the stack in the prologue, we will have a faulting
26509 instruction prior to the stack adjustment and this requires a frame
26510 pointer if we want to catch the exception using the EABI unwinder. */
26511 if (!IS_INTERRUPT (arm_current_func_type ())
26512 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
26513 && arm_except_unwind_info (&global_options
) == UI_TARGET
26514 && cfun
->can_throw_non_call_exceptions
)
26516 HOST_WIDE_INT size
= get_frame_size ();
26518 /* That's irrelevant if there is no stack adjustment. */
26522 /* That's relevant only if there is a stack probe. */
26523 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
26525 /* We don't have the final size of the frame so adjust. */
26526 size
+= 32 * UNITS_PER_WORD
;
26527 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
26537 /* Only thumb1 can't support conditional execution, so return true if
26538 the target is not thumb1. */
26540 arm_have_conditional_execution (void)
26542 return !TARGET_THUMB1
;
26545 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
26546 static HOST_WIDE_INT
26547 arm_vector_alignment (const_tree type
)
26549 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
26551 if (TARGET_AAPCS_BASED
)
26552 align
= MIN (align
, 64);
26557 static unsigned int
26558 arm_autovectorize_vector_sizes (void)
26560 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
26564 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
26566 /* Vectors which aren't in packed structures will not be less aligned than
26567 the natural alignment of their element type, so this is safe. */
26568 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
26571 return default_builtin_vector_alignment_reachable (type
, is_packed
);
26575 arm_builtin_support_vector_misalignment (machine_mode mode
,
26576 const_tree type
, int misalignment
,
26579 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
26581 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
26586 /* If the misalignment is unknown, we should be able to handle the access
26587 so long as it is not to a member of a packed data structure. */
26588 if (misalignment
== -1)
26591 /* Return true if the misalignment is a multiple of the natural alignment
26592 of the vector's element type. This is probably always going to be
26593 true in practice, since we've already established that this isn't a
26595 return ((misalignment
% align
) == 0);
26598 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
26603 arm_conditional_register_usage (void)
26607 if (TARGET_THUMB1
&& optimize_size
)
26609 /* When optimizing for size on Thumb-1, it's better not
26610 to use the HI regs, because of the overhead of
26612 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
26613 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
26616 /* The link register can be clobbered by any branch insn,
26617 but we have no way to track that at present, so mark
26618 it as unavailable. */
26620 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
26622 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
26624 /* VFPv3 registers are disabled when earlier VFP
26625 versions are selected due to the definition of
26626 LAST_VFP_REGNUM. */
26627 for (regno
= FIRST_VFP_REGNUM
;
26628 regno
<= LAST_VFP_REGNUM
; ++ regno
)
26630 fixed_regs
[regno
] = 0;
26631 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
26632 || regno
>= FIRST_VFP_REGNUM
+ 32;
26636 if (TARGET_REALLY_IWMMXT
)
26638 regno
= FIRST_IWMMXT_GR_REGNUM
;
26639 /* The 2002/10/09 revision of the XScale ABI has wCG0
26640 and wCG1 as call-preserved registers. The 2002/11/21
26641 revision changed this so that all wCG registers are
26642 scratch registers. */
26643 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
26644 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
26645 fixed_regs
[regno
] = 0;
26646 /* The XScale ABI has wR0 - wR9 as scratch registers,
26647 the rest as call-preserved registers. */
26648 for (regno
= FIRST_IWMMXT_REGNUM
;
26649 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
26651 fixed_regs
[regno
] = 0;
26652 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
26656 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
26658 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
26659 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
26661 else if (TARGET_APCS_STACK
)
26663 fixed_regs
[10] = 1;
26664 call_used_regs
[10] = 1;
26666 /* -mcaller-super-interworking reserves r11 for calls to
26667 _interwork_r11_call_via_rN(). Making the register global
26668 is an easy way of ensuring that it remains valid for all
26670 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
26671 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
26673 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
26674 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
26675 if (TARGET_CALLER_INTERWORKING
)
26676 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
26678 SUBTARGET_CONDITIONAL_REGISTER_USAGE
26682 arm_preferred_rename_class (reg_class_t rclass
)
26684 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
26685 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
26686 and code size can be reduced. */
26687 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
26693 /* Compute the attribute "length" of insn "*push_multi".
26694 So this function MUST be kept in sync with that insn pattern. */
26696 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
26698 int i
, regno
, hi_reg
;
26699 int num_saves
= XVECLEN (parallel_op
, 0);
26709 regno
= REGNO (first_op
);
26710 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
26711 list is 8-bit. Normally this means all registers in the list must be
26712 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
26713 encodings. There is one exception for PUSH that LR in HI_REGS can be used
26714 with 16-bit encoding. */
26715 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
26716 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
26718 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
26719 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
26727 /* Compute the attribute "length" of insn. Currently, this function is used
26728 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
26729 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
26730 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
26731 true if OPERANDS contains insn which explicit updates base register. */
26734 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
26743 rtx parallel_op
= operands
[0];
26744 /* Initialize to elements number of PARALLEL. */
26745 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
26746 /* Initialize the value to base register. */
26747 unsigned regno
= REGNO (operands
[1]);
26748 /* Skip return and write back pattern.
26749 We only need register pop pattern for later analysis. */
26750 unsigned first_indx
= 0;
26751 first_indx
+= return_pc
? 1 : 0;
26752 first_indx
+= write_back_p
? 1 : 0;
26754 /* A pop operation can be done through LDM or POP. If the base register is SP
26755 and if it's with write back, then a LDM will be alias of POP. */
26756 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
26757 bool ldm_p
= !pop_p
;
26759 /* Check base register for LDM. */
26760 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
26763 /* Check each register in the list. */
26764 for (; indx
>= first_indx
; indx
--)
26766 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
26767 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
26768 comment in arm_attr_length_push_multi. */
26769 if (REGNO_REG_CLASS (regno
) == HI_REGS
26770 && (regno
!= PC_REGNUM
|| ldm_p
))
26777 /* Compute the number of instructions emitted by output_move_double. */
26779 arm_count_output_move_double_insns (rtx
*operands
)
26783 /* output_move_double may modify the operands array, so call it
26784 here on a copy of the array. */
26785 ops
[0] = operands
[0];
26786 ops
[1] = operands
[1];
26787 output_move_double (ops
, false, &count
);
26792 vfp3_const_double_for_fract_bits (rtx operand
)
26794 REAL_VALUE_TYPE r0
;
26796 if (!CONST_DOUBLE_P (operand
))
26799 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
26800 if (exact_real_inverse (DFmode
, &r0
)
26801 && !REAL_VALUE_NEGATIVE (r0
))
26803 if (exact_real_truncate (DFmode
, &r0
))
26805 HOST_WIDE_INT value
= real_to_integer (&r0
);
26806 value
= value
& 0xffffffff;
26807 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
26809 int ret
= exact_log2 (value
);
26810 gcc_assert (IN_RANGE (ret
, 0, 31));
26818 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
26819 log2 is in [1, 32], return that log2. Otherwise return -1.
26820 This is used in the patterns for vcvt.s32.f32 floating-point to
26821 fixed-point conversions. */
26824 vfp3_const_double_for_bits (rtx x
)
26826 const REAL_VALUE_TYPE
*r
;
26828 if (!CONST_DOUBLE_P (x
))
26831 r
= CONST_DOUBLE_REAL_VALUE (x
);
26833 if (REAL_VALUE_NEGATIVE (*r
)
26834 || REAL_VALUE_ISNAN (*r
)
26835 || REAL_VALUE_ISINF (*r
)
26836 || !real_isinteger (r
, SFmode
))
26839 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
26841 /* The exact_log2 above will have returned -1 if this is
26842 not an exact log2. */
26843 if (!IN_RANGE (hwint
, 1, 32))
26850 /* Emit a memory barrier around an atomic sequence according to MODEL. */
26853 arm_pre_atomic_barrier (enum memmodel model
)
26855 if (need_atomic_barrier_p (model
, true))
26856 emit_insn (gen_memory_barrier ());
26860 arm_post_atomic_barrier (enum memmodel model
)
26862 if (need_atomic_barrier_p (model
, false))
26863 emit_insn (gen_memory_barrier ());
26866 /* Emit the load-exclusive and store-exclusive instructions.
26867 Use acquire and release versions if necessary. */
26870 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
26872 rtx (*gen
) (rtx
, rtx
);
26878 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
26879 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
26880 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
26881 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
26883 gcc_unreachable ();
26890 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
26891 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
26892 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
26893 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
26895 gcc_unreachable ();
26899 emit_insn (gen (rval
, mem
));
26903 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
26906 rtx (*gen
) (rtx
, rtx
, rtx
);
26912 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
26913 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
26914 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
26915 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
26917 gcc_unreachable ();
26924 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
26925 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
26926 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
26927 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
26929 gcc_unreachable ();
26933 emit_insn (gen (bval
, rval
, mem
));
26936 /* Mark the previous jump instruction as unlikely. */
26939 emit_unlikely_jump (rtx insn
)
26941 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
26943 insn
= emit_jump_insn (insn
);
26944 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
26947 /* Expand a compare and swap pattern. */
26950 arm_expand_compare_and_swap (rtx operands
[])
26952 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
26954 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
26956 bval
= operands
[0];
26957 rval
= operands
[1];
26959 oldval
= operands
[3];
26960 newval
= operands
[4];
26961 is_weak
= operands
[5];
26962 mod_s
= operands
[6];
26963 mod_f
= operands
[7];
26964 mode
= GET_MODE (mem
);
26966 /* Normally the succ memory model must be stronger than fail, but in the
26967 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
26968 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
26970 if (TARGET_HAVE_LDACQ
26971 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
26972 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
26973 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
26979 /* For narrow modes, we're going to perform the comparison in SImode,
26980 so do the zero-extension now. */
26981 rval
= gen_reg_rtx (SImode
);
26982 oldval
= convert_modes (SImode
, mode
, oldval
, true);
26986 /* Force the value into a register if needed. We waited until after
26987 the zero-extension above to do this properly. */
26988 if (!arm_add_operand (oldval
, SImode
))
26989 oldval
= force_reg (SImode
, oldval
);
26993 if (!cmpdi_operand (oldval
, mode
))
26994 oldval
= force_reg (mode
, oldval
);
26998 gcc_unreachable ();
27003 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
27004 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27005 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27006 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27008 gcc_unreachable ();
27011 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CCmode
, CC_REGNUM
);
27012 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27014 if (mode
== QImode
|| mode
== HImode
)
27015 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27017 /* In all cases, we arrange for success to be signaled by Z set.
27018 This arrangement allows for the boolean result to be used directly
27019 in a subsequent branch, post optimization. For Thumb-1 targets, the
27020 boolean negation of the result is also stored in bval because Thumb-1
27021 backend lacks dependency tracking for CC flag due to flag-setting not
27022 being represented at RTL level. */
27024 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
27027 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
27028 emit_insn (gen_rtx_SET (bval
, x
));
27032 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27033 another memory store between the load-exclusive and store-exclusive can
27034 reset the monitor from Exclusive to Open state. This means we must wait
27035 until after reload to split the pattern, lest we get a register spill in
27036 the middle of the atomic sequence. Success of the compare and swap is
27037 indicated by the Z flag set for 32bit targets and by neg_bval being zero
27038 for Thumb-1 targets (ie. negation of the boolean value returned by
27039 atomic_compare_and_swapmode standard pattern in operand 0). */
27042 arm_split_compare_and_swap (rtx operands
[])
27044 rtx rval
, mem
, oldval
, newval
, neg_bval
;
27046 enum memmodel mod_s
, mod_f
;
27048 rtx_code_label
*label1
, *label2
;
27051 rval
= operands
[1];
27053 oldval
= operands
[3];
27054 newval
= operands
[4];
27055 is_weak
= (operands
[5] != const0_rtx
);
27056 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
27057 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
27058 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
27059 mode
= GET_MODE (mem
);
27061 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
27063 bool use_acquire
= TARGET_HAVE_LDACQ
27064 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27065 || is_mm_release (mod_s
));
27067 bool use_release
= TARGET_HAVE_LDACQ
27068 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27069 || is_mm_acquire (mod_s
));
27071 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27072 a full barrier is emitted after the store-release. */
27074 use_acquire
= false;
27076 /* Checks whether a barrier is needed and emits one accordingly. */
27077 if (!(use_acquire
|| use_release
))
27078 arm_pre_atomic_barrier (mod_s
);
27083 label1
= gen_label_rtx ();
27084 emit_label (label1
);
27086 label2
= gen_label_rtx ();
27088 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
27090 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
27091 as required to communicate with arm_expand_compare_and_swap. */
27094 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
27095 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27096 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27097 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
27098 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
27102 emit_move_insn (neg_bval
, const1_rtx
);
27103 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
27104 if (thumb1_cmpneg_operand (oldval
, SImode
))
27105 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
27108 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
27111 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
27113 /* Weak or strong, we want EQ to be true for success, so that we
27114 match the flags that we got from the compare above. */
27117 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27118 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
27119 emit_insn (gen_rtx_SET (cond
, x
));
27124 /* Z is set to boolean value of !neg_bval, as required to communicate
27125 with arm_expand_compare_and_swap. */
27126 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
27127 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
27130 if (!is_mm_relaxed (mod_f
))
27131 emit_label (label2
);
27133 /* Checks whether a barrier is needed and emits one accordingly. */
27135 || !(use_acquire
|| use_release
))
27136 arm_post_atomic_barrier (mod_s
);
27138 if (is_mm_relaxed (mod_f
))
27139 emit_label (label2
);
27142 /* Split an atomic operation pattern. Operation is given by CODE and is one
27143 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
27144 operation). Operation is performed on the content at MEM and on VALUE
27145 following the memory model MODEL_RTX. The content at MEM before and after
27146 the operation is returned in OLD_OUT and NEW_OUT respectively while the
27147 success of the operation is returned in COND. Using a scratch register or
27148 an operand register for these determines what result is returned for that
27152 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
27153 rtx value
, rtx model_rtx
, rtx cond
)
27155 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
27156 machine_mode mode
= GET_MODE (mem
);
27157 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
27158 rtx_code_label
*label
;
27159 bool all_low_regs
, bind_old_new
;
27162 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
27164 bool use_acquire
= TARGET_HAVE_LDACQ
27165 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
27166 || is_mm_release (model
));
27168 bool use_release
= TARGET_HAVE_LDACQ
27169 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
27170 || is_mm_acquire (model
));
27172 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
27173 a full barrier is emitted after the store-release. */
27175 use_acquire
= false;
27177 /* Checks whether a barrier is needed and emits one accordingly. */
27178 if (!(use_acquire
|| use_release
))
27179 arm_pre_atomic_barrier (model
);
27181 label
= gen_label_rtx ();
27182 emit_label (label
);
27185 new_out
= gen_lowpart (wmode
, new_out
);
27187 old_out
= gen_lowpart (wmode
, old_out
);
27190 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
27192 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
27194 /* Does the operation require destination and first operand to use the same
27195 register? This is decided by register constraints of relevant insn
27196 patterns in thumb1.md. */
27197 gcc_assert (!new_out
|| REG_P (new_out
));
27198 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
27199 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
27200 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
27205 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
27207 /* We want to return the old value while putting the result of the operation
27208 in the same register as the old value so copy the old value over to the
27209 destination register and use that register for the operation. */
27210 if (old_out
&& bind_old_new
)
27212 emit_move_insn (new_out
, old_out
);
27223 x
= gen_rtx_AND (wmode
, old_out
, value
);
27224 emit_insn (gen_rtx_SET (new_out
, x
));
27225 x
= gen_rtx_NOT (wmode
, new_out
);
27226 emit_insn (gen_rtx_SET (new_out
, x
));
27230 if (CONST_INT_P (value
))
27232 value
= GEN_INT (-INTVAL (value
));
27238 if (mode
== DImode
)
27240 /* DImode plus/minus need to clobber flags. */
27241 /* The adddi3 and subdi3 patterns are incorrectly written so that
27242 they require matching operands, even when we could easily support
27243 three operands. Thankfully, this can be fixed up post-splitting,
27244 as the individual add+adc patterns do accept three operands and
27245 post-reload cprop can make these moves go away. */
27246 emit_move_insn (new_out
, old_out
);
27248 x
= gen_adddi3 (new_out
, new_out
, value
);
27250 x
= gen_subdi3 (new_out
, new_out
, value
);
27257 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
27258 emit_insn (gen_rtx_SET (new_out
, x
));
27262 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
27265 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27266 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
27268 /* Checks whether a barrier is needed and emits one accordingly. */
27270 || !(use_acquire
|| use_release
))
27271 arm_post_atomic_barrier (model
);
27274 #define MAX_VECT_LEN 16
27276 struct expand_vec_perm_d
27278 rtx target
, op0
, op1
;
27279 unsigned char perm
[MAX_VECT_LEN
];
27280 machine_mode vmode
;
27281 unsigned char nelt
;
27286 /* Generate a variable permutation. */
27289 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27291 machine_mode vmode
= GET_MODE (target
);
27292 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27294 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
27295 gcc_checking_assert (GET_MODE (op0
) == vmode
);
27296 gcc_checking_assert (GET_MODE (op1
) == vmode
);
27297 gcc_checking_assert (GET_MODE (sel
) == vmode
);
27298 gcc_checking_assert (TARGET_NEON
);
27302 if (vmode
== V8QImode
)
27303 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
27305 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
27311 if (vmode
== V8QImode
)
27313 pair
= gen_reg_rtx (V16QImode
);
27314 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
27315 pair
= gen_lowpart (TImode
, pair
);
27316 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
27320 pair
= gen_reg_rtx (OImode
);
27321 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
27322 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
27328 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27330 machine_mode vmode
= GET_MODE (target
);
27331 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
27332 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27333 rtx rmask
[MAX_VECT_LEN
], mask
;
27335 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27336 numbering of elements for big-endian, we must reverse the order. */
27337 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
27339 /* The VTBL instruction does not use a modulo index, so we must take care
27340 of that ourselves. */
27341 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27342 for (i
= 0; i
< nelt
; ++i
)
27344 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
27345 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
27347 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
27350 /* Map lane ordering between architectural lane order, and GCC lane order,
27351 taking into account ABI. See comment above output_move_neon for details. */
27354 neon_endian_lane_map (machine_mode mode
, int lane
)
27356 if (BYTES_BIG_ENDIAN
)
27358 int nelems
= GET_MODE_NUNITS (mode
);
27359 /* Reverse lane order. */
27360 lane
= (nelems
- 1 - lane
);
27361 /* Reverse D register order, to match ABI. */
27362 if (GET_MODE_SIZE (mode
) == 16)
27363 lane
= lane
^ (nelems
/ 2);
27368 /* Some permutations index into pairs of vectors, this is a helper function
27369 to map indexes into those pairs of vectors. */
27372 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
27374 int nelem
= GET_MODE_NUNITS (mode
);
27375 if (BYTES_BIG_ENDIAN
)
27377 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
27381 /* Generate or test for an insn that supports a constant permutation. */
27383 /* Recognize patterns for the VUZP insns. */
27386 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
27388 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27389 rtx out0
, out1
, in0
, in1
;
27390 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27394 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27397 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
27398 big endian pattern on 64 bit vectors, so we correct for that. */
27399 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
27400 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
27402 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
27404 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
27406 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
27410 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27412 for (i
= 0; i
< nelt
; i
++)
27415 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
27416 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
27426 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
27427 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
27428 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
27429 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
27430 case V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
27431 case V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
27432 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
27433 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
27434 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
27435 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
27437 gcc_unreachable ();
27442 if (swap_nelt
!= 0)
27443 std::swap (in0
, in1
);
27446 out1
= gen_reg_rtx (d
->vmode
);
27448 std::swap (out0
, out1
);
27450 emit_insn (gen (out0
, in0
, in1
, out1
));
27454 /* Recognize patterns for the VZIP insns. */
27457 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
27459 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
27460 rtx out0
, out1
, in0
, in1
;
27461 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27465 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27468 is_swapped
= BYTES_BIG_ENDIAN
;
27470 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
27473 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
27475 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
27479 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27481 for (i
= 0; i
< nelt
/ 2; i
++)
27484 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
27485 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
27489 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
27490 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
27501 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
27502 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
27503 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
27504 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
27505 case V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
27506 case V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
27507 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
27508 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
27509 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
27510 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
27512 gcc_unreachable ();
27518 std::swap (in0
, in1
);
27521 out1
= gen_reg_rtx (d
->vmode
);
27523 std::swap (out0
, out1
);
27525 emit_insn (gen (out0
, in0
, in1
, out1
));
27529 /* Recognize patterns for the VREV insns. */
27532 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
27534 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
27535 rtx (*gen
)(rtx
, rtx
);
27537 if (!d
->one_vector_p
)
27546 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
27547 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
27555 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
27556 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
27557 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
27558 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
27559 case V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
27560 case V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
27568 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
27569 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
27570 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
27571 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
27572 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
27573 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
27574 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
27575 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
27584 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
27585 for (j
= 0; j
<= diff
; j
+= 1)
27587 /* This is guaranteed to be true as the value of diff
27588 is 7, 3, 1 and we should have enough elements in the
27589 queue to generate this. Getting a vector mask with a
27590 value of diff other than these values implies that
27591 something is wrong by the time we get here. */
27592 gcc_assert (i
+ j
< nelt
);
27593 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
27601 emit_insn (gen (d
->target
, d
->op0
));
27605 /* Recognize patterns for the VTRN insns. */
27608 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
27610 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27611 rtx out0
, out1
, in0
, in1
;
27612 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27614 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27617 /* Note that these are little-endian tests. Adjust for big-endian later. */
27618 if (d
->perm
[0] == 0)
27620 else if (d
->perm
[0] == 1)
27624 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27626 for (i
= 0; i
< nelt
; i
+= 2)
27628 if (d
->perm
[i
] != i
+ odd
)
27630 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
27640 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
27641 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
27642 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
27643 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
27644 case V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
27645 case V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
27646 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
27647 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
27648 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
27649 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
27651 gcc_unreachable ();
27656 if (BYTES_BIG_ENDIAN
)
27658 std::swap (in0
, in1
);
27663 out1
= gen_reg_rtx (d
->vmode
);
27665 std::swap (out0
, out1
);
27667 emit_insn (gen (out0
, in0
, in1
, out1
));
27671 /* Recognize patterns for the VEXT insns. */
27674 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
27676 unsigned int i
, nelt
= d
->nelt
;
27677 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
27680 unsigned int location
;
27682 unsigned int next
= d
->perm
[0] + 1;
27684 /* TODO: Handle GCC's numbering of elements for big-endian. */
27685 if (BYTES_BIG_ENDIAN
)
27688 /* Check if the extracted indexes are increasing by one. */
27689 for (i
= 1; i
< nelt
; next
++, i
++)
27691 /* If we hit the most significant element of the 2nd vector in
27692 the previous iteration, no need to test further. */
27693 if (next
== 2 * nelt
)
27696 /* If we are operating on only one vector: it could be a
27697 rotation. If there are only two elements of size < 64, let
27698 arm_evpc_neon_vrev catch it. */
27699 if (d
->one_vector_p
&& (next
== nelt
))
27701 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
27707 if (d
->perm
[i
] != next
)
27711 location
= d
->perm
[0];
27715 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
27716 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
27717 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
27718 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
27719 case V2SImode
: gen
= gen_neon_vextv2si
; break;
27720 case V4SImode
: gen
= gen_neon_vextv4si
; break;
27721 case V4HFmode
: gen
= gen_neon_vextv4hf
; break;
27722 case V8HFmode
: gen
= gen_neon_vextv8hf
; break;
27723 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
27724 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
27725 case V2DImode
: gen
= gen_neon_vextv2di
; break;
27734 offset
= GEN_INT (location
);
27735 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
27739 /* The NEON VTBL instruction is a fully variable permuation that's even
27740 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
27741 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
27742 can do slightly better by expanding this as a constant where we don't
27743 have to apply a mask. */
27746 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
27748 rtx rperm
[MAX_VECT_LEN
], sel
;
27749 machine_mode vmode
= d
->vmode
;
27750 unsigned int i
, nelt
= d
->nelt
;
27752 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27753 numbering of elements for big-endian, we must reverse the order. */
27754 if (BYTES_BIG_ENDIAN
)
27760 /* Generic code will try constant permutation twice. Once with the
27761 original mode and again with the elements lowered to QImode.
27762 So wait and don't do the selector expansion ourselves. */
27763 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
27766 for (i
= 0; i
< nelt
; ++i
)
27767 rperm
[i
] = GEN_INT (d
->perm
[i
]);
27768 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
27769 sel
= force_reg (vmode
, sel
);
27771 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
27776 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
27778 /* Check if the input mask matches vext before reordering the
27781 if (arm_evpc_neon_vext (d
))
27784 /* The pattern matching functions above are written to look for a small
27785 number to begin the sequence (0, 1, N/2). If we begin with an index
27786 from the second operand, we can swap the operands. */
27787 if (d
->perm
[0] >= d
->nelt
)
27789 unsigned i
, nelt
= d
->nelt
;
27791 for (i
= 0; i
< nelt
; ++i
)
27792 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
27794 std::swap (d
->op0
, d
->op1
);
27799 if (arm_evpc_neon_vuzp (d
))
27801 if (arm_evpc_neon_vzip (d
))
27803 if (arm_evpc_neon_vrev (d
))
27805 if (arm_evpc_neon_vtrn (d
))
27807 return arm_evpc_neon_vtbl (d
);
27812 /* Expand a vec_perm_const pattern. */
27815 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27817 struct expand_vec_perm_d d
;
27818 int i
, nelt
, which
;
27824 d
.vmode
= GET_MODE (target
);
27825 gcc_assert (VECTOR_MODE_P (d
.vmode
));
27826 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
27827 d
.testing_p
= false;
27829 for (i
= which
= 0; i
< nelt
; ++i
)
27831 rtx e
= XVECEXP (sel
, 0, i
);
27832 int ei
= INTVAL (e
) & (2 * nelt
- 1);
27833 which
|= (ei
< nelt
? 1 : 2);
27843 d
.one_vector_p
= false;
27844 if (!rtx_equal_p (op0
, op1
))
27847 /* The elements of PERM do not suggest that only the first operand
27848 is used, but both operands are identical. Allow easier matching
27849 of the permutation by folding the permutation into the single
27853 for (i
= 0; i
< nelt
; ++i
)
27854 d
.perm
[i
] &= nelt
- 1;
27856 d
.one_vector_p
= true;
27861 d
.one_vector_p
= true;
27865 return arm_expand_vec_perm_const_1 (&d
);
27868 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
27871 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
27872 const unsigned char *sel
)
27874 struct expand_vec_perm_d d
;
27875 unsigned int i
, nelt
, which
;
27879 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
27880 d
.testing_p
= true;
27881 memcpy (d
.perm
, sel
, nelt
);
27883 /* Categorize the set of elements in the selector. */
27884 for (i
= which
= 0; i
< nelt
; ++i
)
27886 unsigned char e
= d
.perm
[i
];
27887 gcc_assert (e
< 2 * nelt
);
27888 which
|= (e
< nelt
? 1 : 2);
27891 /* For all elements from second vector, fold the elements to first. */
27893 for (i
= 0; i
< nelt
; ++i
)
27896 /* Check whether the mask can be applied to the vector type. */
27897 d
.one_vector_p
= (which
!= 3);
27899 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
27900 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
27901 if (!d
.one_vector_p
)
27902 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
27905 ret
= arm_expand_vec_perm_const_1 (&d
);
27912 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
27914 /* If we are soft float and we do not have ldrd
27915 then all auto increment forms are ok. */
27916 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
27921 /* Post increment and Pre Decrement are supported for all
27922 instruction forms except for vector forms. */
27925 if (VECTOR_MODE_P (mode
))
27927 if (code
!= ARM_PRE_DEC
)
27937 /* Without LDRD and mode size greater than
27938 word size, there is no point in auto-incrementing
27939 because ldm and stm will not have these forms. */
27940 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
27943 /* Vector and floating point modes do not support
27944 these auto increment forms. */
27945 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
27958 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
27959 on ARM, since we know that shifts by negative amounts are no-ops.
27960 Additionally, the default expansion code is not available or suitable
27961 for post-reload insn splits (this can occur when the register allocator
27962 chooses not to do a shift in NEON).
27964 This function is used in both initial expand and post-reload splits, and
27965 handles all kinds of 64-bit shifts.
27967 Input requirements:
27968 - It is safe for the input and output to be the same register, but
27969 early-clobber rules apply for the shift amount and scratch registers.
27970 - Shift by register requires both scratch registers. In all other cases
27971 the scratch registers may be NULL.
27972 - Ashiftrt by a register also clobbers the CC register. */
27974 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
27975 rtx amount
, rtx scratch1
, rtx scratch2
)
27977 rtx out_high
= gen_highpart (SImode
, out
);
27978 rtx out_low
= gen_lowpart (SImode
, out
);
27979 rtx in_high
= gen_highpart (SImode
, in
);
27980 rtx in_low
= gen_lowpart (SImode
, in
);
27983 in = the register pair containing the input value.
27984 out = the destination register pair.
27985 up = the high- or low-part of each pair.
27986 down = the opposite part to "up".
27987 In a shift, we can consider bits to shift from "up"-stream to
27988 "down"-stream, so in a left-shift "up" is the low-part and "down"
27989 is the high-part of each register pair. */
27991 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
27992 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
27993 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
27994 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
27996 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
27998 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
27999 && GET_MODE (out
) == DImode
);
28001 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28002 && GET_MODE (in
) == DImode
);
28004 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28005 && GET_MODE (amount
) == SImode
)
28006 || CONST_INT_P (amount
)));
28007 gcc_assert (scratch1
== NULL
28008 || (GET_CODE (scratch1
) == SCRATCH
)
28009 || (GET_MODE (scratch1
) == SImode
28010 && REG_P (scratch1
)));
28011 gcc_assert (scratch2
== NULL
28012 || (GET_CODE (scratch2
) == SCRATCH
)
28013 || (GET_MODE (scratch2
) == SImode
28014 && REG_P (scratch2
)));
28015 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28016 || !HARD_REGISTER_P (out
)
28017 || (REGNO (out
) != REGNO (amount
)
28018 && REGNO (out
) + 1 != REGNO (amount
)));
28020 /* Macros to make following code more readable. */
28021 #define SUB_32(DEST,SRC) \
28022 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28023 #define RSB_32(DEST,SRC) \
28024 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28025 #define SUB_S_32(DEST,SRC) \
28026 gen_addsi3_compare0 ((DEST), (SRC), \
28028 #define SET(DEST,SRC) \
28029 gen_rtx_SET ((DEST), (SRC))
28030 #define SHIFT(CODE,SRC,AMOUNT) \
28031 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28032 #define LSHIFT(CODE,SRC,AMOUNT) \
28033 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28034 SImode, (SRC), (AMOUNT))
28035 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28036 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28037 SImode, (SRC), (AMOUNT))
28039 gen_rtx_IOR (SImode, (A), (B))
28040 #define BRANCH(COND,LABEL) \
28041 gen_arm_cond_branch ((LABEL), \
28042 gen_rtx_ ## COND (CCmode, cc_reg, \
28046 /* Shifts by register and shifts by constant are handled separately. */
28047 if (CONST_INT_P (amount
))
28049 /* We have a shift-by-constant. */
28051 /* First, handle out-of-range shift amounts.
28052 In both cases we try to match the result an ARM instruction in a
28053 shift-by-register would give. This helps reduce execution
28054 differences between optimization levels, but it won't stop other
28055 parts of the compiler doing different things. This is "undefined
28056 behavior, in any case. */
28057 if (INTVAL (amount
) <= 0)
28058 emit_insn (gen_movdi (out
, in
));
28059 else if (INTVAL (amount
) >= 64)
28061 if (code
== ASHIFTRT
)
28063 rtx const31_rtx
= GEN_INT (31);
28064 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28065 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28068 emit_insn (gen_movdi (out
, const0_rtx
));
28071 /* Now handle valid shifts. */
28072 else if (INTVAL (amount
) < 32)
28074 /* Shifts by a constant less than 32. */
28075 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28077 /* Clearing the out register in DImode first avoids lots
28078 of spilling and results in less stack usage.
28079 Later this redundant insn is completely removed.
28080 Do that only if "in" and "out" are different registers. */
28081 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
28082 emit_insn (SET (out
, const0_rtx
));
28083 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28084 emit_insn (SET (out_down
,
28085 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28087 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28091 /* Shifts by a constant greater than 31. */
28092 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28094 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
28095 emit_insn (SET (out
, const0_rtx
));
28096 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28097 if (code
== ASHIFTRT
)
28098 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28101 emit_insn (SET (out_up
, const0_rtx
));
28106 /* We have a shift-by-register. */
28107 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28109 /* This alternative requires the scratch registers. */
28110 gcc_assert (scratch1
&& REG_P (scratch1
));
28111 gcc_assert (scratch2
&& REG_P (scratch2
));
28113 /* We will need the values "amount-32" and "32-amount" later.
28114 Swapping them around now allows the later code to be more general. */
28118 emit_insn (SUB_32 (scratch1
, amount
));
28119 emit_insn (RSB_32 (scratch2
, amount
));
28122 emit_insn (RSB_32 (scratch1
, amount
));
28123 /* Also set CC = amount > 32. */
28124 emit_insn (SUB_S_32 (scratch2
, amount
));
28127 emit_insn (RSB_32 (scratch1
, amount
));
28128 emit_insn (SUB_32 (scratch2
, amount
));
28131 gcc_unreachable ();
28134 /* Emit code like this:
28137 out_down = in_down << amount;
28138 out_down = (in_up << (amount - 32)) | out_down;
28139 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28140 out_up = in_up << amount;
28143 out_down = in_down >> amount;
28144 out_down = (in_up << (32 - amount)) | out_down;
28146 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28147 out_up = in_up << amount;
28150 out_down = in_down >> amount;
28151 out_down = (in_up << (32 - amount)) | out_down;
28153 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28154 out_up = in_up << amount;
28156 The ARM and Thumb2 variants are the same but implemented slightly
28157 differently. If this were only called during expand we could just
28158 use the Thumb2 case and let combine do the right thing, but this
28159 can also be called from post-reload splitters. */
28161 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28163 if (!TARGET_THUMB2
)
28165 /* Emit code for ARM mode. */
28166 emit_insn (SET (out_down
,
28167 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28168 if (code
== ASHIFTRT
)
28170 rtx_code_label
*done_label
= gen_label_rtx ();
28171 emit_jump_insn (BRANCH (LT
, done_label
));
28172 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28174 emit_label (done_label
);
28177 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28182 /* Emit code for Thumb2 mode.
28183 Thumb2 can't do shift and or in one insn. */
28184 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28185 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28187 if (code
== ASHIFTRT
)
28189 rtx_code_label
*done_label
= gen_label_rtx ();
28190 emit_jump_insn (BRANCH (LT
, done_label
));
28191 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
28192 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
28193 emit_label (done_label
);
28197 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
28198 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
28202 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28216 /* Returns true if the pattern is a valid symbolic address, which is either a
28217 symbol_ref or (symbol_ref + addend).
28219 According to the ARM ELF ABI, the initial addend of REL-type relocations
28220 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
28221 literal field of the instruction as a 16-bit signed value in the range
28222 -32768 <= A < 32768. */
28225 arm_valid_symbolic_address_p (rtx addr
)
28227 rtx xop0
, xop1
= NULL_RTX
;
28230 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
28233 /* (const (plus: symbol_ref const_int)) */
28234 if (GET_CODE (addr
) == CONST
)
28235 tmp
= XEXP (addr
, 0);
28237 if (GET_CODE (tmp
) == PLUS
)
28239 xop0
= XEXP (tmp
, 0);
28240 xop1
= XEXP (tmp
, 1);
28242 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
28243 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
28249 /* Returns true if a valid comparison operation and makes
28250 the operands in a form that is valid. */
28252 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
28254 enum rtx_code code
= GET_CODE (*comparison
);
28256 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
28257 ? GET_MODE (*op2
) : GET_MODE (*op1
);
28259 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
28261 if (code
== UNEQ
|| code
== LTGT
)
28264 code_int
= (int)code
;
28265 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
28266 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
28271 if (!arm_add_operand (*op1
, mode
))
28272 *op1
= force_reg (mode
, *op1
);
28273 if (!arm_add_operand (*op2
, mode
))
28274 *op2
= force_reg (mode
, *op2
);
28278 if (!cmpdi_operand (*op1
, mode
))
28279 *op1
= force_reg (mode
, *op1
);
28280 if (!cmpdi_operand (*op2
, mode
))
28281 *op2
= force_reg (mode
, *op2
);
28285 if (!TARGET_VFP_FP16INST
)
28287 /* FP16 comparisons are done in SF mode. */
28289 *op1
= convert_to_mode (mode
, *op1
, 1);
28290 *op2
= convert_to_mode (mode
, *op2
, 1);
28291 /* Fall through. */
28294 if (!vfp_compare_operand (*op1
, mode
))
28295 *op1
= force_reg (mode
, *op1
);
28296 if (!vfp_compare_operand (*op2
, mode
))
28297 *op2
= force_reg (mode
, *op2
);
28307 /* Maximum number of instructions to set block of memory. */
28309 arm_block_set_max_insns (void)
28311 if (optimize_function_for_size_p (cfun
))
28314 return current_tune
->max_insns_inline_memset
;
28317 /* Return TRUE if it's profitable to set block of memory for
28318 non-vectorized case. VAL is the value to set the memory
28319 with. LENGTH is the number of bytes to set. ALIGN is the
28320 alignment of the destination memory in bytes. UNALIGNED_P
28321 is TRUE if we can only set the memory with instructions
28322 meeting alignment requirements. USE_STRD_P is TRUE if we
28323 can use strd to set the memory. */
28325 arm_block_set_non_vect_profit_p (rtx val
,
28326 unsigned HOST_WIDE_INT length
,
28327 unsigned HOST_WIDE_INT align
,
28328 bool unaligned_p
, bool use_strd_p
)
28331 /* For leftovers in bytes of 0-7, we can set the memory block using
28332 strb/strh/str with minimum instruction number. */
28333 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28337 num
= arm_const_inline_cost (SET
, val
);
28338 num
+= length
/ align
+ length
% align
;
28340 else if (use_strd_p
)
28342 num
= arm_const_double_inline_cost (val
);
28343 num
+= (length
>> 3) + leftover
[length
& 7];
28347 num
= arm_const_inline_cost (SET
, val
);
28348 num
+= (length
>> 2) + leftover
[length
& 3];
28351 /* We may be able to combine last pair STRH/STRB into a single STR
28352 by shifting one byte back. */
28353 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
28356 return (num
<= arm_block_set_max_insns ());
28359 /* Return TRUE if it's profitable to set block of memory for
28360 vectorized case. LENGTH is the number of bytes to set.
28361 ALIGN is the alignment of destination memory in bytes.
28362 MODE is the vector mode used to set the memory. */
28364 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
28365 unsigned HOST_WIDE_INT align
,
28369 bool unaligned_p
= ((align
& 3) != 0);
28370 unsigned int nelt
= GET_MODE_NUNITS (mode
);
28372 /* Instruction loading constant value. */
28374 /* Instructions storing the memory. */
28375 num
+= (length
+ nelt
- 1) / nelt
;
28376 /* Instructions adjusting the address expression. Only need to
28377 adjust address expression if it's 4 bytes aligned and bytes
28378 leftover can only be stored by mis-aligned store instruction. */
28379 if (!unaligned_p
&& (length
& 3) != 0)
28382 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28383 if (!unaligned_p
&& mode
== V16QImode
)
28386 return (num
<= arm_block_set_max_insns ());
28389 /* Set a block of memory using vectorization instructions for the
28390 unaligned case. We fill the first LENGTH bytes of the memory
28391 area starting from DSTBASE with byte constant VALUE. ALIGN is
28392 the alignment requirement of memory. Return TRUE if succeeded. */
28394 arm_block_set_unaligned_vect (rtx dstbase
,
28395 unsigned HOST_WIDE_INT length
,
28396 unsigned HOST_WIDE_INT value
,
28397 unsigned HOST_WIDE_INT align
)
28399 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
28401 rtx val_elt
, val_vec
, reg
;
28402 rtx rval
[MAX_VECT_LEN
];
28403 rtx (*gen_func
) (rtx
, rtx
);
28405 unsigned HOST_WIDE_INT v
= value
;
28406 unsigned int offset
= 0;
28407 gcc_assert ((align
& 0x3) != 0);
28408 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28409 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28410 if (length
>= nelt_v16
)
28413 gen_func
= gen_movmisalignv16qi
;
28418 gen_func
= gen_movmisalignv8qi
;
28420 nelt_mode
= GET_MODE_NUNITS (mode
);
28421 gcc_assert (length
>= nelt_mode
);
28422 /* Skip if it isn't profitable. */
28423 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28426 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28427 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28429 v
= sext_hwi (v
, BITS_PER_WORD
);
28430 val_elt
= GEN_INT (v
);
28431 for (j
= 0; j
< nelt_mode
; j
++)
28434 reg
= gen_reg_rtx (mode
);
28435 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28436 /* Emit instruction loading the constant value. */
28437 emit_move_insn (reg
, val_vec
);
28439 /* Handle nelt_mode bytes in a vector. */
28440 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28442 emit_insn ((*gen_func
) (mem
, reg
));
28443 if (i
+ 2 * nelt_mode
<= length
)
28445 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
28446 offset
+= nelt_mode
;
28447 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28451 /* If there are not less than nelt_v8 bytes leftover, we must be in
28453 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
28455 /* Handle (8, 16) bytes leftover. */
28456 if (i
+ nelt_v8
< length
)
28458 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
28459 offset
+= length
- i
;
28460 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28462 /* We are shifting bytes back, set the alignment accordingly. */
28463 if ((length
& 1) != 0 && align
>= 2)
28464 set_mem_align (mem
, BITS_PER_UNIT
);
28466 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28468 /* Handle (0, 8] bytes leftover. */
28469 else if (i
< length
&& i
+ nelt_v8
>= length
)
28471 if (mode
== V16QImode
)
28472 reg
= gen_lowpart (V8QImode
, reg
);
28474 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
28475 + (nelt_mode
- nelt_v8
))));
28476 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
28477 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
28479 /* We are shifting bytes back, set the alignment accordingly. */
28480 if ((length
& 1) != 0 && align
>= 2)
28481 set_mem_align (mem
, BITS_PER_UNIT
);
28483 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28489 /* Set a block of memory using vectorization instructions for the
28490 aligned case. We fill the first LENGTH bytes of the memory area
28491 starting from DSTBASE with byte constant VALUE. ALIGN is the
28492 alignment requirement of memory. Return TRUE if succeeded. */
28494 arm_block_set_aligned_vect (rtx dstbase
,
28495 unsigned HOST_WIDE_INT length
,
28496 unsigned HOST_WIDE_INT value
,
28497 unsigned HOST_WIDE_INT align
)
28499 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
28500 rtx dst
, addr
, mem
;
28501 rtx val_elt
, val_vec
, reg
;
28502 rtx rval
[MAX_VECT_LEN
];
28504 unsigned HOST_WIDE_INT v
= value
;
28505 unsigned int offset
= 0;
28507 gcc_assert ((align
& 0x3) == 0);
28508 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28509 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28510 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
28515 nelt_mode
= GET_MODE_NUNITS (mode
);
28516 gcc_assert (length
>= nelt_mode
);
28517 /* Skip if it isn't profitable. */
28518 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28521 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28523 v
= sext_hwi (v
, BITS_PER_WORD
);
28524 val_elt
= GEN_INT (v
);
28525 for (j
= 0; j
< nelt_mode
; j
++)
28528 reg
= gen_reg_rtx (mode
);
28529 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28530 /* Emit instruction loading the constant value. */
28531 emit_move_insn (reg
, val_vec
);
28534 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28535 if (mode
== V16QImode
)
28537 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28538 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28540 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28541 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
28543 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28544 offset
+= length
- nelt_mode
;
28545 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28546 /* We are shifting bytes back, set the alignment accordingly. */
28547 if ((length
& 0x3) == 0)
28548 set_mem_align (mem
, BITS_PER_UNIT
* 4);
28549 else if ((length
& 0x1) == 0)
28550 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28552 set_mem_align (mem
, BITS_PER_UNIT
);
28554 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28557 /* Fall through for bytes leftover. */
28559 nelt_mode
= GET_MODE_NUNITS (mode
);
28560 reg
= gen_lowpart (V8QImode
, reg
);
28563 /* Handle 8 bytes in a vector. */
28564 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28566 addr
= plus_constant (Pmode
, dst
, i
);
28567 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
28568 emit_move_insn (mem
, reg
);
28571 /* Handle single word leftover by shifting 4 bytes back. We can
28572 use aligned access for this case. */
28573 if (i
+ UNITS_PER_WORD
== length
)
28575 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
28576 offset
+= i
- UNITS_PER_WORD
;
28577 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
28578 /* We are shifting 4 bytes back, set the alignment accordingly. */
28579 if (align
> UNITS_PER_WORD
)
28580 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
28582 emit_move_insn (mem
, reg
);
28584 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28585 We have to use unaligned access for this case. */
28586 else if (i
< length
)
28588 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28589 offset
+= length
- nelt_mode
;
28590 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28591 /* We are shifting bytes back, set the alignment accordingly. */
28592 if ((length
& 1) == 0)
28593 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28595 set_mem_align (mem
, BITS_PER_UNIT
);
28597 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28603 /* Set a block of memory using plain strh/strb instructions, only
28604 using instructions allowed by ALIGN on processor. We fill the
28605 first LENGTH bytes of the memory area starting from DSTBASE
28606 with byte constant VALUE. ALIGN is the alignment requirement
28609 arm_block_set_unaligned_non_vect (rtx dstbase
,
28610 unsigned HOST_WIDE_INT length
,
28611 unsigned HOST_WIDE_INT value
,
28612 unsigned HOST_WIDE_INT align
)
28615 rtx dst
, addr
, mem
;
28616 rtx val_exp
, val_reg
, reg
;
28618 HOST_WIDE_INT v
= value
;
28620 gcc_assert (align
== 1 || align
== 2);
28623 v
|= (value
<< BITS_PER_UNIT
);
28625 v
= sext_hwi (v
, BITS_PER_WORD
);
28626 val_exp
= GEN_INT (v
);
28627 /* Skip if it isn't profitable. */
28628 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28629 align
, true, false))
28632 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28633 mode
= (align
== 2 ? HImode
: QImode
);
28634 val_reg
= force_reg (SImode
, val_exp
);
28635 reg
= gen_lowpart (mode
, val_reg
);
28637 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
28639 addr
= plus_constant (Pmode
, dst
, i
);
28640 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28641 emit_move_insn (mem
, reg
);
28644 /* Handle single byte leftover. */
28645 if (i
+ 1 == length
)
28647 reg
= gen_lowpart (QImode
, val_reg
);
28648 addr
= plus_constant (Pmode
, dst
, i
);
28649 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
28650 emit_move_insn (mem
, reg
);
28654 gcc_assert (i
== length
);
28658 /* Set a block of memory using plain strd/str/strh/strb instructions,
28659 to permit unaligned copies on processors which support unaligned
28660 semantics for those instructions. We fill the first LENGTH bytes
28661 of the memory area starting from DSTBASE with byte constant VALUE.
28662 ALIGN is the alignment requirement of memory. */
28664 arm_block_set_aligned_non_vect (rtx dstbase
,
28665 unsigned HOST_WIDE_INT length
,
28666 unsigned HOST_WIDE_INT value
,
28667 unsigned HOST_WIDE_INT align
)
28670 rtx dst
, addr
, mem
;
28671 rtx val_exp
, val_reg
, reg
;
28672 unsigned HOST_WIDE_INT v
;
28675 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
28676 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
28678 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
28679 if (length
< UNITS_PER_WORD
)
28680 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
28683 v
|= (v
<< BITS_PER_WORD
);
28685 v
= sext_hwi (v
, BITS_PER_WORD
);
28687 val_exp
= GEN_INT (v
);
28688 /* Skip if it isn't profitable. */
28689 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28690 align
, false, use_strd_p
))
28695 /* Try without strd. */
28696 v
= (v
>> BITS_PER_WORD
);
28697 v
= sext_hwi (v
, BITS_PER_WORD
);
28698 val_exp
= GEN_INT (v
);
28699 use_strd_p
= false;
28700 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28701 align
, false, use_strd_p
))
28706 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28707 /* Handle double words using strd if possible. */
28710 val_reg
= force_reg (DImode
, val_exp
);
28712 for (; (i
+ 8 <= length
); i
+= 8)
28714 addr
= plus_constant (Pmode
, dst
, i
);
28715 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
28716 emit_move_insn (mem
, reg
);
28720 val_reg
= force_reg (SImode
, val_exp
);
28722 /* Handle words. */
28723 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
28724 for (; (i
+ 4 <= length
); i
+= 4)
28726 addr
= plus_constant (Pmode
, dst
, i
);
28727 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
28728 if ((align
& 3) == 0)
28729 emit_move_insn (mem
, reg
);
28731 emit_insn (gen_unaligned_storesi (mem
, reg
));
28734 /* Merge last pair of STRH and STRB into a STR if possible. */
28735 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
28737 addr
= plus_constant (Pmode
, dst
, i
- 1);
28738 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
28739 /* We are shifting one byte back, set the alignment accordingly. */
28740 if ((align
& 1) == 0)
28741 set_mem_align (mem
, BITS_PER_UNIT
);
28743 /* Most likely this is an unaligned access, and we can't tell at
28744 compilation time. */
28745 emit_insn (gen_unaligned_storesi (mem
, reg
));
28749 /* Handle half word leftover. */
28750 if (i
+ 2 <= length
)
28752 reg
= gen_lowpart (HImode
, val_reg
);
28753 addr
= plus_constant (Pmode
, dst
, i
);
28754 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
28755 if ((align
& 1) == 0)
28756 emit_move_insn (mem
, reg
);
28758 emit_insn (gen_unaligned_storehi (mem
, reg
));
28763 /* Handle single byte leftover. */
28764 if (i
+ 1 == length
)
28766 reg
= gen_lowpart (QImode
, val_reg
);
28767 addr
= plus_constant (Pmode
, dst
, i
);
28768 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
28769 emit_move_insn (mem
, reg
);
28775 /* Set a block of memory using vectorization instructions for both
28776 aligned and unaligned cases. We fill the first LENGTH bytes of
28777 the memory area starting from DSTBASE with byte constant VALUE.
28778 ALIGN is the alignment requirement of memory. */
28780 arm_block_set_vect (rtx dstbase
,
28781 unsigned HOST_WIDE_INT length
,
28782 unsigned HOST_WIDE_INT value
,
28783 unsigned HOST_WIDE_INT align
)
28785 /* Check whether we need to use unaligned store instruction. */
28786 if (((align
& 3) != 0 || (length
& 3) != 0)
28787 /* Check whether unaligned store instruction is available. */
28788 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
28791 if ((align
& 3) == 0)
28792 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
28794 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
28797 /* Expand string store operation. Firstly we try to do that by using
28798 vectorization instructions, then try with ARM unaligned access and
28799 double-word store if profitable. OPERANDS[0] is the destination,
28800 OPERANDS[1] is the number of bytes, operands[2] is the value to
28801 initialize the memory, OPERANDS[3] is the known alignment of the
28804 arm_gen_setmem (rtx
*operands
)
28806 rtx dstbase
= operands
[0];
28807 unsigned HOST_WIDE_INT length
;
28808 unsigned HOST_WIDE_INT value
;
28809 unsigned HOST_WIDE_INT align
;
28811 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
28814 length
= UINTVAL (operands
[1]);
28818 value
= (UINTVAL (operands
[2]) & 0xFF);
28819 align
= UINTVAL (operands
[3]);
28820 if (TARGET_NEON
&& length
>= 8
28821 && current_tune
->string_ops_prefer_neon
28822 && arm_block_set_vect (dstbase
, length
, value
, align
))
28825 if (!unaligned_access
&& (align
& 3) != 0)
28826 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
28828 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
28833 arm_macro_fusion_p (void)
28835 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
28838 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
28839 for MOVW / MOVT macro fusion. */
28842 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
28844 /* We are trying to fuse
28845 movw imm / movt imm
28846 instructions as a group that gets scheduled together. */
28848 rtx set_dest
= SET_DEST (curr_set
);
28850 if (GET_MODE (set_dest
) != SImode
)
28853 /* We are trying to match:
28854 prev (movw) == (set (reg r0) (const_int imm16))
28855 curr (movt) == (set (zero_extract (reg r0)
28858 (const_int imm16_1))
28860 prev (movw) == (set (reg r1)
28861 (high (symbol_ref ("SYM"))))
28862 curr (movt) == (set (reg r0)
28864 (symbol_ref ("SYM")))) */
28866 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
28868 if (CONST_INT_P (SET_SRC (curr_set
))
28869 && CONST_INT_P (SET_SRC (prev_set
))
28870 && REG_P (XEXP (set_dest
, 0))
28871 && REG_P (SET_DEST (prev_set
))
28872 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
28876 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
28877 && REG_P (SET_DEST (curr_set
))
28878 && REG_P (SET_DEST (prev_set
))
28879 && GET_CODE (SET_SRC (prev_set
)) == HIGH
28880 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
28887 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
28889 rtx prev_set
= single_set (prev
);
28890 rtx curr_set
= single_set (curr
);
28896 if (any_condjump_p (curr
))
28899 if (!arm_macro_fusion_p ())
28902 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
28903 && aarch_crypto_can_dual_issue (prev
, curr
))
28906 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
28907 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
28913 /* Return true iff the instruction fusion described by OP is enabled. */
28915 arm_fusion_enabled_p (tune_params::fuse_ops op
)
28917 return current_tune
->fusible_ops
& op
;
28920 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
28922 static unsigned HOST_WIDE_INT
28923 arm_asan_shadow_offset (void)
28925 return HOST_WIDE_INT_1U
<< 29;
28929 /* This is a temporary fix for PR60655. Ideally we need
28930 to handle most of these cases in the generic part but
28931 currently we reject minus (..) (sym_ref). We try to
28932 ameliorate the case with minus (sym_ref1) (sym_ref2)
28933 where they are in the same section. */
28936 arm_const_not_ok_for_debug_p (rtx p
)
28938 tree decl_op0
= NULL
;
28939 tree decl_op1
= NULL
;
28941 if (GET_CODE (p
) == MINUS
)
28943 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
28945 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
28947 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
28948 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
28950 if ((VAR_P (decl_op1
)
28951 || TREE_CODE (decl_op1
) == CONST_DECL
)
28952 && (VAR_P (decl_op0
)
28953 || TREE_CODE (decl_op0
) == CONST_DECL
))
28954 return (get_variable_section (decl_op1
, false)
28955 != get_variable_section (decl_op0
, false));
28957 if (TREE_CODE (decl_op1
) == LABEL_DECL
28958 && TREE_CODE (decl_op0
) == LABEL_DECL
)
28959 return (DECL_CONTEXT (decl_op1
)
28960 != DECL_CONTEXT (decl_op0
));
28970 /* return TRUE if x is a reference to a value in a constant pool */
28972 arm_is_constant_pool_ref (rtx x
)
28975 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
28976 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
28979 /* Remember the last target of arm_set_current_function. */
28980 static GTY(()) tree arm_previous_fndecl
;
28982 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
28985 save_restore_target_globals (tree new_tree
)
28987 /* If we have a previous state, use it. */
28988 if (TREE_TARGET_GLOBALS (new_tree
))
28989 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
28990 else if (new_tree
== target_option_default_node
)
28991 restore_target_globals (&default_target_globals
);
28994 /* Call target_reinit and save the state for TARGET_GLOBALS. */
28995 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
28998 arm_option_params_internal ();
29001 /* Invalidate arm_previous_fndecl. */
29004 arm_reset_previous_fndecl (void)
29006 arm_previous_fndecl
= NULL_TREE
;
29009 /* Establish appropriate back-end context for processing the function
29010 FNDECL. The argument might be NULL to indicate processing at top
29011 level, outside of any function scope. */
29014 arm_set_current_function (tree fndecl
)
29016 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
29019 tree old_tree
= (arm_previous_fndecl
29020 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
29023 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
29025 /* If current function has no attributes but previous one did,
29026 use the default node. */
29027 if (! new_tree
&& old_tree
)
29028 new_tree
= target_option_default_node
;
29030 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
29031 the default have been handled by save_restore_target_globals from
29032 arm_pragma_target_parse. */
29033 if (old_tree
== new_tree
)
29036 arm_previous_fndecl
= fndecl
;
29038 /* First set the target options. */
29039 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
29041 save_restore_target_globals (new_tree
);
29044 /* Implement TARGET_OPTION_PRINT. */
29047 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
29049 int flags
= ptr
->x_target_flags
;
29050 const struct arm_fpu_desc
*fpu_desc
= &all_fpus
[ptr
->x_arm_fpu_index
];
29052 fprintf (file
, "%*sselected arch %s\n", indent
, "",
29053 TARGET_THUMB2_P (flags
) ? "thumb2" :
29054 TARGET_THUMB_P (flags
) ? "thumb1" :
29057 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_desc
->name
);
29060 /* Hook to determine if one function can safely inline another. */
29063 arm_can_inline_p (tree caller
, tree callee
)
29065 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
29066 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
29068 struct cl_target_option
*caller_opts
29069 = TREE_TARGET_OPTION (caller_tree
? caller_tree
29070 : target_option_default_node
);
29072 struct cl_target_option
*callee_opts
29073 = TREE_TARGET_OPTION (callee_tree
? callee_tree
29074 : target_option_default_node
);
29076 const struct arm_fpu_desc
*caller_fpu
29077 = &all_fpus
[caller_opts
->x_arm_fpu_index
];
29078 const struct arm_fpu_desc
*callee_fpu
29079 = &all_fpus
[callee_opts
->x_arm_fpu_index
];
29081 /* Callee's fpu features should be a subset of the caller's. */
29082 if ((caller_fpu
->features
& callee_fpu
->features
) != callee_fpu
->features
)
29085 /* Need same FPU regs. */
29086 if (callee_fpu
->regs
!= callee_fpu
->regs
)
29089 /* OK to inline between different modes.
29090 Function with mode specific instructions, e.g using asm,
29091 must be explicitly protected with noinline. */
29095 /* Hook to fix function's alignment affected by target attribute. */
29098 arm_relayout_function (tree fndecl
)
29100 if (DECL_USER_ALIGN (fndecl
))
29103 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
29106 callee_tree
= target_option_default_node
;
29108 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
29109 SET_DECL_ALIGN (fndecl
, FUNCTION_BOUNDARY_P (opts
->x_target_flags
));
29112 /* Inner function to process the attribute((target(...))), take an argument and
29113 set the current options from the argument. If we have a list, recursively
29114 go over the list. */
29117 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
29119 if (TREE_CODE (args
) == TREE_LIST
)
29123 for (; args
; args
= TREE_CHAIN (args
))
29124 if (TREE_VALUE (args
)
29125 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
29130 else if (TREE_CODE (args
) != STRING_CST
)
29132 error ("attribute %<target%> argument not a string");
29136 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
29139 while ((q
= strtok (argstr
, ",")) != NULL
)
29141 while (ISSPACE (*q
)) ++q
;
29144 if (!strncmp (q
, "thumb", 5))
29145 opts
->x_target_flags
|= MASK_THUMB
;
29147 else if (!strncmp (q
, "arm", 3))
29148 opts
->x_target_flags
&= ~MASK_THUMB
;
29150 else if (!strncmp (q
, "fpu=", 4))
29152 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
29153 &opts
->x_arm_fpu_index
, CL_TARGET
))
29155 error ("invalid fpu for attribute(target(\"%s\"))", q
);
29161 error ("attribute(target(\"%s\")) is unknown", q
);
29165 arm_option_check_internal (opts
);
29171 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29174 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
29175 struct gcc_options
*opts_set
)
29177 if (!arm_valid_target_attribute_rec (args
, opts
))
29180 /* Do any overrides, such as global options arch=xxx. */
29181 arm_option_override_internal (opts
, opts_set
);
29183 return build_target_option_node (opts
);
29187 add_attribute (const char * mode
, tree
*attributes
)
29189 size_t len
= strlen (mode
);
29190 tree value
= build_string (len
, mode
);
29192 TREE_TYPE (value
) = build_array_type (char_type_node
,
29193 build_index_type (size_int (len
)));
29195 *attributes
= tree_cons (get_identifier ("target"),
29196 build_tree_list (NULL_TREE
, value
),
29200 /* For testing. Insert thumb or arm modes alternatively on functions. */
29203 arm_insert_attributes (tree fndecl
, tree
* attributes
)
29207 if (! TARGET_FLIP_THUMB
)
29210 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
29211 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
29214 /* Nested definitions must inherit mode. */
29215 if (current_function_decl
)
29217 mode
= TARGET_THUMB
? "thumb" : "arm";
29218 add_attribute (mode
, attributes
);
29222 /* If there is already a setting don't change it. */
29223 if (lookup_attribute ("target", *attributes
) != NULL
)
29226 mode
= thumb_flipper
? "thumb" : "arm";
29227 add_attribute (mode
, attributes
);
29229 thumb_flipper
= !thumb_flipper
;
29232 /* Hook to validate attribute((target("string"))). */
29235 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
29236 tree args
, int ARG_UNUSED (flags
))
29239 struct gcc_options func_options
;
29240 tree cur_tree
, new_optimize
;
29241 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
29243 /* Get the optimization options of the current function. */
29244 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
29246 /* If the function changed the optimization levels as well as setting target
29247 options, start with the optimizations specified. */
29248 if (!func_optimize
)
29249 func_optimize
= optimization_default_node
;
29251 /* Init func_options. */
29252 memset (&func_options
, 0, sizeof (func_options
));
29253 init_options_struct (&func_options
, NULL
);
29254 lang_hooks
.init_options_struct (&func_options
);
29256 /* Initialize func_options to the defaults. */
29257 cl_optimization_restore (&func_options
,
29258 TREE_OPTIMIZATION (func_optimize
));
29260 cl_target_option_restore (&func_options
,
29261 TREE_TARGET_OPTION (target_option_default_node
));
29263 /* Set func_options flags with new target mode. */
29264 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
29265 &global_options_set
);
29267 if (cur_tree
== NULL_TREE
)
29270 new_optimize
= build_optimization_node (&func_options
);
29272 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
29274 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
29276 finalize_options_struct (&func_options
);
29282 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
29285 fprintf (stream
, "\t.syntax unified\n");
29289 if (is_called_in_ARM_mode (decl
)
29290 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
29291 && cfun
->is_thunk
))
29292 fprintf (stream
, "\t.code 32\n");
29293 else if (TARGET_THUMB1
)
29294 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
29296 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
29299 fprintf (stream
, "\t.arm\n");
29301 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
29302 TARGET_SOFT_FLOAT
? "softvfp" : TARGET_FPU_NAME
);
29304 if (TARGET_POKE_FUNCTION_NAME
)
29305 arm_poke_function_name (stream
, (const char *) name
);
29308 /* If MEM is in the form of [base+offset], extract the two parts
29309 of address and set to BASE and OFFSET, otherwise return false
29310 after clearing BASE and OFFSET. */
29313 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
29317 gcc_assert (MEM_P (mem
));
29319 addr
= XEXP (mem
, 0);
29321 /* Strip off const from addresses like (const (addr)). */
29322 if (GET_CODE (addr
) == CONST
)
29323 addr
= XEXP (addr
, 0);
29325 if (GET_CODE (addr
) == REG
)
29328 *offset
= const0_rtx
;
29332 if (GET_CODE (addr
) == PLUS
29333 && GET_CODE (XEXP (addr
, 0)) == REG
29334 && CONST_INT_P (XEXP (addr
, 1)))
29336 *base
= XEXP (addr
, 0);
29337 *offset
= XEXP (addr
, 1);
29342 *offset
= NULL_RTX
;
29347 /* If INSN is a load or store of address in the form of [base+offset],
29348 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29349 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29350 otherwise return FALSE. */
29353 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
29357 gcc_assert (INSN_P (insn
));
29358 x
= PATTERN (insn
);
29359 if (GET_CODE (x
) != SET
)
29363 dest
= SET_DEST (x
);
29364 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
29367 extract_base_offset_in_addr (dest
, base
, offset
);
29369 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
29372 extract_base_offset_in_addr (src
, base
, offset
);
29377 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
29380 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29382 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29383 and PRI are only calculated for these instructions. For other instruction,
29384 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29385 instruction fusion can be supported by returning different priorities.
29387 It's important that irrelevant instructions get the largest FUSION_PRI. */
29390 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
29391 int *fusion_pri
, int *pri
)
29397 gcc_assert (INSN_P (insn
));
29400 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
29407 /* Load goes first. */
29409 *fusion_pri
= tmp
- 1;
29411 *fusion_pri
= tmp
- 2;
29415 /* INSN with smaller base register goes first. */
29416 tmp
-= ((REGNO (base
) & 0xff) << 20);
29418 /* INSN with smaller offset goes first. */
29419 off_val
= (int)(INTVAL (offset
));
29421 tmp
-= (off_val
& 0xfffff);
29423 tmp
+= ((- off_val
) & 0xfffff);
29430 /* Construct and return a PARALLEL RTX vector with elements numbering the
29431 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
29432 the vector - from the perspective of the architecture. This does not
29433 line up with GCC's perspective on lane numbers, so we end up with
29434 different masks depending on our target endian-ness. The diagram
29435 below may help. We must draw the distinction when building masks
29436 which select one half of the vector. An instruction selecting
29437 architectural low-lanes for a big-endian target, must be described using
29438 a mask selecting GCC high-lanes.
29440 Big-Endian Little-Endian
29442 GCC 0 1 2 3 3 2 1 0
29443 | x | x | x | x | | x | x | x | x |
29444 Architecture 3 2 1 0 3 2 1 0
29446 Low Mask: { 2, 3 } { 0, 1 }
29447 High Mask: { 0, 1 } { 2, 3 }
29451 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
29453 int nunits
= GET_MODE_NUNITS (mode
);
29454 rtvec v
= rtvec_alloc (nunits
/ 2);
29455 int high_base
= nunits
/ 2;
29461 if (BYTES_BIG_ENDIAN
)
29462 base
= high
? low_base
: high_base
;
29464 base
= high
? high_base
: low_base
;
29466 for (i
= 0; i
< nunits
/ 2; i
++)
29467 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
29469 t1
= gen_rtx_PARALLEL (mode
, v
);
29473 /* Check OP for validity as a PARALLEL RTX vector with elements
29474 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
29475 from the perspective of the architecture. See the diagram above
29476 arm_simd_vect_par_cnst_half_p for more details. */
29479 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
29482 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
29483 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
29484 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
29487 if (!VECTOR_MODE_P (mode
))
29490 if (count_op
!= count_ideal
)
29493 for (i
= 0; i
< count_ideal
; i
++)
29495 rtx elt_op
= XVECEXP (op
, 0, i
);
29496 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
29498 if (!CONST_INT_P (elt_op
)
29499 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
29505 /* Can output mi_thunk for all cases except for non-zero vcall_offset
29508 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
29511 /* For now, we punt and not handle this for TARGET_THUMB1. */
29512 if (vcall_offset
&& TARGET_THUMB1
)
29515 /* Otherwise ok. */
29519 /* Generate RTL for a conditional branch with rtx comparison CODE in
29520 mode CC_MODE. The destination of the unlikely conditional branch
29524 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
29528 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
29529 gen_rtx_REG (cc_mode
, CC_REGNUM
),
29532 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29533 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
29535 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
29538 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
29540 For pure-code sections there is no letter code for this attribute, so
29541 output all the section flags numerically when this is needed. */
29544 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
29547 if (flags
& SECTION_ARM_PURECODE
)
29551 if (!(flags
& SECTION_DEBUG
))
29553 if (flags
& SECTION_EXCLUDE
)
29554 *num
|= 0x80000000;
29555 if (flags
& SECTION_WRITE
)
29557 if (flags
& SECTION_CODE
)
29559 if (flags
& SECTION_MERGE
)
29561 if (flags
& SECTION_STRINGS
)
29563 if (flags
& SECTION_TLS
)
29565 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
29574 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
29576 If pure-code is passed as an option, make sure all functions are in
29577 sections that have the SHF_ARM_PURECODE attribute. */
29580 arm_function_section (tree decl
, enum node_frequency freq
,
29581 bool startup
, bool exit
)
29583 const char * section_name
;
29586 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
29587 return default_function_section (decl
, freq
, startup
, exit
);
29589 if (!target_pure_code
)
29590 return default_function_section (decl
, freq
, startup
, exit
);
29593 section_name
= DECL_SECTION_NAME (decl
);
29595 /* If a function is not in a named section then it falls under the 'default'
29596 text section, also known as '.text'. We can preserve previous behavior as
29597 the default text section already has the SHF_ARM_PURECODE section
29601 section
*default_sec
= default_function_section (decl
, freq
, startup
,
29604 /* If default_sec is not null, then it must be a special section like for
29605 example .text.startup. We set the pure-code attribute and return the
29606 same section to preserve existing behavior. */
29608 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
29609 return default_sec
;
29612 /* Otherwise look whether a section has already been created with
29614 sec
= get_named_section (decl
, section_name
, 0);
29616 /* If that is not the case passing NULL as the section's name to
29617 'get_named_section' will create a section with the declaration's
29619 sec
= get_named_section (decl
, NULL
, 0);
29621 /* Set the SHF_ARM_PURECODE attribute. */
29622 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
29627 /* Implements the TARGET_SECTION_FLAGS hook.
29629 If DECL is a function declaration and pure-code is passed as an option
29630 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
29631 section's name and RELOC indicates whether the declarations initializer may
29632 contain runtime relocations. */
29634 static unsigned int
29635 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
29637 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
29639 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
29640 flags
|= SECTION_ARM_PURECODE
;
29645 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
29648 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
29650 rtx
*quot_p
, rtx
*rem_p
)
29652 if (mode
== SImode
)
29653 gcc_assert (!TARGET_IDIV
);
29655 machine_mode libval_mode
= smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode
),
29658 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
29660 op0
, GET_MODE (op0
),
29661 op1
, GET_MODE (op1
));
29663 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
29664 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
29665 GET_MODE_SIZE (mode
));
29667 gcc_assert (quotient
);
29668 gcc_assert (remainder
);
29670 *quot_p
= quotient
;
29671 *rem_p
= remainder
;
29674 #include "gt-arm.h"