1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
33 #include "stringpool.h"
39 #include "diagnostic-core.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
46 #include "insn-attr.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
60 #include "target-globals.h"
62 #include "tm-constrs.h"
65 /* This file should be included last. */
66 #include "target-def.h"
68 /* Forward definitions of types. */
69 typedef struct minipool_node Mnode
;
70 typedef struct minipool_fixup Mfix
;
72 void (*arm_lang_output_object_attributes_hook
)(void);
79 /* Forward function declarations. */
80 static bool arm_const_not_ok_for_debug_p (rtx
);
81 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
82 static int arm_compute_static_chain_stack_bytes (void);
83 static arm_stack_offsets
*arm_get_frame_offsets (void);
84 static void arm_add_gc_roots (void);
85 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
86 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
87 static unsigned bit_count (unsigned long);
88 static unsigned feature_count (const arm_feature_set
*);
89 static int arm_address_register_rtx_p (rtx
, int);
90 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
91 static bool is_called_in_ARM_mode (tree
);
92 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
93 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
94 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
95 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
96 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
97 inline static int thumb1_index_register_rtx_p (rtx
, int);
98 static int thumb_far_jump_used_p (void);
99 static bool thumb_force_lr_save (void);
100 static unsigned arm_size_return_regs (void);
101 static bool arm_assemble_integer (rtx
, unsigned int, int);
102 static void arm_print_operand (FILE *, rtx
, int);
103 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
104 static bool arm_print_operand_punct_valid_p (unsigned char code
);
105 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
106 static arm_cc
get_arm_condition_code (rtx
);
107 static const char *output_multi_immediate (rtx
*, const char *, const char *,
109 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
110 static struct machine_function
*arm_init_machine_status (void);
111 static void thumb_exit (FILE *, int);
112 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
113 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
114 static Mnode
*add_minipool_forward_ref (Mfix
*);
115 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
116 static Mnode
*add_minipool_backward_ref (Mfix
*);
117 static void assign_minipool_offsets (Mfix
*);
118 static void arm_print_value (FILE *, rtx
);
119 static void dump_minipool (rtx_insn
*);
120 static int arm_barrier_cost (rtx_insn
*);
121 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
122 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
123 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
125 static void arm_reorg (void);
126 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
127 static unsigned long arm_compute_save_reg0_reg12_mask (void);
128 static unsigned long arm_compute_save_reg_mask (void);
129 static unsigned long arm_isr_value (tree
);
130 static unsigned long arm_compute_func_type (void);
131 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
132 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
133 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
134 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
135 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
137 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
138 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
139 static int arm_comp_type_attributes (const_tree
, const_tree
);
140 static void arm_set_default_type_attributes (tree
);
141 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
142 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
143 static int optimal_immediate_sequence (enum rtx_code code
,
144 unsigned HOST_WIDE_INT val
,
145 struct four_ints
*return_sequence
);
146 static int optimal_immediate_sequence_1 (enum rtx_code code
,
147 unsigned HOST_WIDE_INT val
,
148 struct four_ints
*return_sequence
,
150 static int arm_get_strip_length (int);
151 static bool arm_function_ok_for_sibcall (tree
, tree
);
152 static machine_mode
arm_promote_function_mode (const_tree
,
155 static bool arm_return_in_memory (const_tree
, const_tree
);
156 static rtx
arm_function_value (const_tree
, const_tree
, bool);
157 static rtx
arm_libcall_value_1 (machine_mode
);
158 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
159 static bool arm_function_value_regno_p (const unsigned int);
160 static void arm_internal_label (FILE *, const char *, unsigned long);
161 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
163 static bool arm_have_conditional_execution (void);
164 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
165 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
166 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
167 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
168 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
169 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
170 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
171 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
172 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
173 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
174 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
175 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
176 static void emit_constant_insn (rtx cond
, rtx pattern
);
177 static rtx_insn
*emit_set_insn (rtx
, rtx
);
178 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
179 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
181 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
183 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
185 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
186 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
188 static rtx
aapcs_libcall_value (machine_mode
);
189 static int aapcs_select_return_coproc (const_tree
, const_tree
);
191 #ifdef OBJECT_FORMAT_ELF
192 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
193 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
196 static void arm_encode_section_info (tree
, rtx
, int);
199 static void arm_file_end (void);
200 static void arm_file_start (void);
201 static void arm_insert_attributes (tree
, tree
*);
203 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
205 static bool arm_pass_by_reference (cumulative_args_t
,
206 machine_mode
, const_tree
, bool);
207 static bool arm_promote_prototypes (const_tree
);
208 static bool arm_default_short_enums (void);
209 static bool arm_align_anon_bitfield (void);
210 static bool arm_return_in_msb (const_tree
);
211 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
212 static bool arm_return_in_memory (const_tree
, const_tree
);
214 static void arm_unwind_emit (FILE *, rtx_insn
*);
215 static bool arm_output_ttype (rtx
);
216 static void arm_asm_emit_except_personality (rtx
);
217 static void arm_asm_init_sections (void);
219 static rtx
arm_dwarf_register_span (rtx
);
221 static tree
arm_cxx_guard_type (void);
222 static bool arm_cxx_guard_mask_bit (void);
223 static tree
arm_get_cookie_size (tree
);
224 static bool arm_cookie_has_size (void);
225 static bool arm_cxx_cdtor_returns_this (void);
226 static bool arm_cxx_key_method_may_be_inline (void);
227 static void arm_cxx_determine_class_data_visibility (tree
);
228 static bool arm_cxx_class_data_always_comdat (void);
229 static bool arm_cxx_use_aeabi_atexit (void);
230 static void arm_init_libfuncs (void);
231 static tree
arm_build_builtin_va_list (void);
232 static void arm_expand_builtin_va_start (tree
, rtx
);
233 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
234 static void arm_option_override (void);
235 static void arm_override_options_after_change (void);
236 static void arm_option_print (FILE *, int, struct cl_target_option
*);
237 static void arm_set_current_function (tree
);
238 static bool arm_can_inline_p (tree
, tree
);
239 static void arm_relayout_function (tree
);
240 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
241 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
242 static bool arm_macro_fusion_p (void);
243 static bool arm_cannot_copy_insn_p (rtx_insn
*);
244 static int arm_issue_rate (void);
245 static int arm_first_cycle_multipass_dfa_lookahead (void);
246 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
247 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
248 static bool arm_output_addr_const_extra (FILE *, rtx
);
249 static bool arm_allocate_stack_slots_for_args (void);
250 static bool arm_warn_func_return (tree
);
251 static tree
arm_promoted_type (const_tree t
);
252 static tree
arm_convert_to_type (tree type
, tree expr
);
253 static bool arm_scalar_mode_supported_p (machine_mode
);
254 static bool arm_frame_pointer_required (void);
255 static bool arm_can_eliminate (const int, const int);
256 static void arm_asm_trampoline_template (FILE *);
257 static void arm_trampoline_init (rtx
, tree
, rtx
);
258 static rtx
arm_trampoline_adjust_address (rtx
);
259 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
260 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
261 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
262 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
263 static bool arm_array_mode_supported_p (machine_mode
,
264 unsigned HOST_WIDE_INT
);
265 static machine_mode
arm_preferred_simd_mode (machine_mode
);
266 static bool arm_class_likely_spilled_p (reg_class_t
);
267 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
268 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
269 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
273 static void arm_conditional_register_usage (void);
274 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
275 static unsigned int arm_autovectorize_vector_sizes (void);
276 static int arm_default_branch_cost (bool, bool);
277 static int arm_cortex_a5_branch_cost (bool, bool);
278 static int arm_cortex_m_branch_cost (bool, bool);
279 static int arm_cortex_m7_branch_cost (bool, bool);
281 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
282 const unsigned char *sel
);
284 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
286 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
288 int misalign ATTRIBUTE_UNUSED
);
289 static unsigned arm_add_stmt_cost (void *data
, int count
,
290 enum vect_cost_for_stmt kind
,
291 struct _stmt_vec_info
*stmt_info
,
293 enum vect_cost_model_location where
);
295 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
296 bool op0_preserve_value
);
297 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
299 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
300 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
304 /* Table of machine attributes. */
305 static const struct attribute_spec arm_attribute_table
[] =
307 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
308 affects_type_identity } */
309 /* Function calls made to this symbol must be done indirectly, because
310 it may lie outside of the 26 bit addressing range of a normal function
312 { "long_call", 0, 0, false, true, true, NULL
, false },
313 /* Whereas these functions are always known to reside within the 26 bit
315 { "short_call", 0, 0, false, true, true, NULL
, false },
316 /* Specify the procedure call conventions for a function. */
317 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
319 /* Interrupt Service Routines have special prologue and epilogue requirements. */
320 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
322 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
324 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
327 /* ARM/PE has three new attributes:
329 dllexport - for exporting a function/variable that will live in a dll
330 dllimport - for importing a function/variable from a dll
332 Microsoft allows multiple declspecs in one __declspec, separating
333 them with spaces. We do NOT support this. Instead, use __declspec
336 { "dllimport", 0, 0, true, false, false, NULL
, false },
337 { "dllexport", 0, 0, true, false, false, NULL
, false },
338 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
340 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
343 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
346 { NULL
, 0, 0, false, false, false, NULL
, false }
349 /* Initialize the GCC target structure. */
350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
351 #undef TARGET_MERGE_DECL_ATTRIBUTES
352 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
355 #undef TARGET_LEGITIMIZE_ADDRESS
356 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
359 #define TARGET_LRA_P hook_bool_void_true
361 #undef TARGET_ATTRIBUTE_TABLE
362 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
364 #undef TARGET_INSERT_ATTRIBUTES
365 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
367 #undef TARGET_ASM_FILE_START
368 #define TARGET_ASM_FILE_START arm_file_start
369 #undef TARGET_ASM_FILE_END
370 #define TARGET_ASM_FILE_END arm_file_end
372 #undef TARGET_ASM_ALIGNED_SI_OP
373 #define TARGET_ASM_ALIGNED_SI_OP NULL
374 #undef TARGET_ASM_INTEGER
375 #define TARGET_ASM_INTEGER arm_assemble_integer
377 #undef TARGET_PRINT_OPERAND
378 #define TARGET_PRINT_OPERAND arm_print_operand
379 #undef TARGET_PRINT_OPERAND_ADDRESS
380 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
381 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
382 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
384 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
385 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
387 #undef TARGET_ASM_FUNCTION_PROLOGUE
388 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
390 #undef TARGET_ASM_FUNCTION_EPILOGUE
391 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
393 #undef TARGET_CAN_INLINE_P
394 #define TARGET_CAN_INLINE_P arm_can_inline_p
396 #undef TARGET_RELAYOUT_FUNCTION
397 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE arm_option_override
402 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
403 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
405 #undef TARGET_OPTION_PRINT
406 #define TARGET_OPTION_PRINT arm_option_print
408 #undef TARGET_COMP_TYPE_ATTRIBUTES
409 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
411 #undef TARGET_SCHED_MACRO_FUSION_P
412 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
414 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
415 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
417 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
418 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
420 #undef TARGET_SCHED_ADJUST_COST
421 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
423 #undef TARGET_SET_CURRENT_FUNCTION
424 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
426 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
427 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
429 #undef TARGET_SCHED_REORDER
430 #define TARGET_SCHED_REORDER arm_sched_reorder
432 #undef TARGET_REGISTER_MOVE_COST
433 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
435 #undef TARGET_MEMORY_MOVE_COST
436 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
438 #undef TARGET_ENCODE_SECTION_INFO
440 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
442 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
445 #undef TARGET_STRIP_NAME_ENCODING
446 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
448 #undef TARGET_ASM_INTERNAL_LABEL
449 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
451 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
452 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
454 #undef TARGET_FUNCTION_VALUE
455 #define TARGET_FUNCTION_VALUE arm_function_value
457 #undef TARGET_LIBCALL_VALUE
458 #define TARGET_LIBCALL_VALUE arm_libcall_value
460 #undef TARGET_FUNCTION_VALUE_REGNO_P
461 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
463 #undef TARGET_ASM_OUTPUT_MI_THUNK
464 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
465 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
466 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
468 #undef TARGET_RTX_COSTS
469 #define TARGET_RTX_COSTS arm_rtx_costs
470 #undef TARGET_ADDRESS_COST
471 #define TARGET_ADDRESS_COST arm_address_cost
473 #undef TARGET_SHIFT_TRUNCATION_MASK
474 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
475 #undef TARGET_VECTOR_MODE_SUPPORTED_P
476 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
477 #undef TARGET_ARRAY_MODE_SUPPORTED_P
478 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
479 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
480 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
481 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
482 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
483 arm_autovectorize_vector_sizes
485 #undef TARGET_MACHINE_DEPENDENT_REORG
486 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
488 #undef TARGET_INIT_BUILTINS
489 #define TARGET_INIT_BUILTINS arm_init_builtins
490 #undef TARGET_EXPAND_BUILTIN
491 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
492 #undef TARGET_BUILTIN_DECL
493 #define TARGET_BUILTIN_DECL arm_builtin_decl
495 #undef TARGET_INIT_LIBFUNCS
496 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
498 #undef TARGET_PROMOTE_FUNCTION_MODE
499 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
500 #undef TARGET_PROMOTE_PROTOTYPES
501 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
502 #undef TARGET_PASS_BY_REFERENCE
503 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
504 #undef TARGET_ARG_PARTIAL_BYTES
505 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
506 #undef TARGET_FUNCTION_ARG
507 #define TARGET_FUNCTION_ARG arm_function_arg
508 #undef TARGET_FUNCTION_ARG_ADVANCE
509 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
510 #undef TARGET_FUNCTION_ARG_BOUNDARY
511 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
513 #undef TARGET_SETUP_INCOMING_VARARGS
514 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
516 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
517 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
519 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
520 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
521 #undef TARGET_TRAMPOLINE_INIT
522 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
523 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
524 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
526 #undef TARGET_WARN_FUNC_RETURN
527 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
529 #undef TARGET_DEFAULT_SHORT_ENUMS
530 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
532 #undef TARGET_ALIGN_ANON_BITFIELD
533 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
535 #undef TARGET_NARROW_VOLATILE_BITFIELD
536 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
538 #undef TARGET_CXX_GUARD_TYPE
539 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
541 #undef TARGET_CXX_GUARD_MASK_BIT
542 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
544 #undef TARGET_CXX_GET_COOKIE_SIZE
545 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
547 #undef TARGET_CXX_COOKIE_HAS_SIZE
548 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
550 #undef TARGET_CXX_CDTOR_RETURNS_THIS
551 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
553 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
554 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
556 #undef TARGET_CXX_USE_AEABI_ATEXIT
557 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
559 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
560 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
561 arm_cxx_determine_class_data_visibility
563 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
564 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
566 #undef TARGET_RETURN_IN_MSB
567 #define TARGET_RETURN_IN_MSB arm_return_in_msb
569 #undef TARGET_RETURN_IN_MEMORY
570 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
572 #undef TARGET_MUST_PASS_IN_STACK
573 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
576 #undef TARGET_ASM_UNWIND_EMIT
577 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
579 /* EABI unwinding tables use a different format for the typeinfo tables. */
580 #undef TARGET_ASM_TTYPE
581 #define TARGET_ASM_TTYPE arm_output_ttype
583 #undef TARGET_ARM_EABI_UNWINDER
584 #define TARGET_ARM_EABI_UNWINDER true
586 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
587 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
589 #undef TARGET_ASM_INIT_SECTIONS
590 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
591 #endif /* ARM_UNWIND_INFO */
593 #undef TARGET_DWARF_REGISTER_SPAN
594 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
596 #undef TARGET_CANNOT_COPY_INSN_P
597 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
600 #undef TARGET_HAVE_TLS
601 #define TARGET_HAVE_TLS true
604 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
605 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
607 #undef TARGET_LEGITIMATE_CONSTANT_P
608 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
610 #undef TARGET_CANNOT_FORCE_CONST_MEM
611 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
613 #undef TARGET_MAX_ANCHOR_OFFSET
614 #define TARGET_MAX_ANCHOR_OFFSET 4095
616 /* The minimum is set such that the total size of the block
617 for a particular anchor is -4088 + 1 + 4095 bytes, which is
618 divisible by eight, ensuring natural spacing of anchors. */
619 #undef TARGET_MIN_ANCHOR_OFFSET
620 #define TARGET_MIN_ANCHOR_OFFSET -4088
622 #undef TARGET_SCHED_ISSUE_RATE
623 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
625 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
626 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
627 arm_first_cycle_multipass_dfa_lookahead
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
631 arm_first_cycle_multipass_dfa_lookahead_guard
633 #undef TARGET_MANGLE_TYPE
634 #define TARGET_MANGLE_TYPE arm_mangle_type
636 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
637 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
639 #undef TARGET_BUILD_BUILTIN_VA_LIST
640 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
641 #undef TARGET_EXPAND_BUILTIN_VA_START
642 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
643 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
644 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
647 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
648 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
651 #undef TARGET_LEGITIMATE_ADDRESS_P
652 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
654 #undef TARGET_PREFERRED_RELOAD_CLASS
655 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
657 #undef TARGET_PROMOTED_TYPE
658 #define TARGET_PROMOTED_TYPE arm_promoted_type
660 #undef TARGET_CONVERT_TO_TYPE
661 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
663 #undef TARGET_SCALAR_MODE_SUPPORTED_P
664 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
666 #undef TARGET_FRAME_POINTER_REQUIRED
667 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
669 #undef TARGET_CAN_ELIMINATE
670 #define TARGET_CAN_ELIMINATE arm_can_eliminate
672 #undef TARGET_CONDITIONAL_REGISTER_USAGE
673 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
675 #undef TARGET_CLASS_LIKELY_SPILLED_P
676 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
678 #undef TARGET_VECTORIZE_BUILTINS
679 #define TARGET_VECTORIZE_BUILTINS
681 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
682 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
683 arm_builtin_vectorized_function
685 #undef TARGET_VECTOR_ALIGNMENT
686 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
688 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
689 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
690 arm_vector_alignment_reachable
692 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
693 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
694 arm_builtin_support_vector_misalignment
696 #undef TARGET_PREFERRED_RENAME_CLASS
697 #define TARGET_PREFERRED_RENAME_CLASS \
698 arm_preferred_rename_class
700 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
701 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
702 arm_vectorize_vec_perm_const_ok
704 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
705 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
706 arm_builtin_vectorization_cost
707 #undef TARGET_VECTORIZE_ADD_STMT_COST
708 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
710 #undef TARGET_CANONICALIZE_COMPARISON
711 #define TARGET_CANONICALIZE_COMPARISON \
712 arm_canonicalize_comparison
714 #undef TARGET_ASAN_SHADOW_OFFSET
715 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
717 #undef MAX_INSN_PER_IT_BLOCK
718 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
720 #undef TARGET_CAN_USE_DOLOOP_P
721 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
723 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
724 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
726 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
727 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
729 #undef TARGET_SCHED_FUSION_PRIORITY
730 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
732 struct gcc_target targetm
= TARGET_INITIALIZER
;
734 /* Obstack for minipool constant handling. */
735 static struct obstack minipool_obstack
;
736 static char * minipool_startobj
;
738 /* The maximum number of insns skipped which
739 will be conditionalised if possible. */
740 static int max_insns_skipped
= 5;
742 extern FILE * asm_out_file
;
744 /* True if we are currently building a constant table. */
745 int making_const_table
;
747 /* The processor for which instructions should be scheduled. */
748 enum processor_type arm_tune
= arm_none
;
750 /* The current tuning set. */
751 const struct tune_params
*current_tune
;
753 /* Which floating point hardware to schedule for. */
756 /* Used for Thumb call_via trampolines. */
757 rtx thumb_call_via_label
[14];
758 static int thumb_call_reg_needed
;
760 /* The bits in this mask specify which
761 instructions we are allowed to generate. */
762 arm_feature_set insn_flags
= ARM_FSET_EMPTY
;
764 /* The bits in this mask specify which instruction scheduling options should
766 arm_feature_set tune_flags
= ARM_FSET_EMPTY
;
768 /* The highest ARM architecture version supported by the
770 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
772 /* The following are used in the arm.md file as equivalents to bits
773 in the above two flag variables. */
775 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
778 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
781 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
784 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
787 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
790 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
793 /* Nonzero if this chip supports the ARM 6K extensions. */
796 /* Nonzero if this chip supports the ARM 6KZ extensions. */
799 /* Nonzero if instructions present in ARMv6-M can be used. */
802 /* Nonzero if this chip supports the ARM 7 extensions. */
805 /* Nonzero if instructions not present in the 'M' profile can be used. */
806 int arm_arch_notm
= 0;
808 /* Nonzero if instructions present in ARMv7E-M can be used. */
811 /* Nonzero if instructions present in ARMv8 can be used. */
814 /* Nonzero if this chip supports the ARMv8.1 extensions. */
817 /* Nonzero if this chip can benefit from load scheduling. */
818 int arm_ld_sched
= 0;
820 /* Nonzero if this chip is a StrongARM. */
821 int arm_tune_strongarm
= 0;
823 /* Nonzero if this chip supports Intel Wireless MMX technology. */
824 int arm_arch_iwmmxt
= 0;
826 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
827 int arm_arch_iwmmxt2
= 0;
829 /* Nonzero if this chip is an XScale. */
830 int arm_arch_xscale
= 0;
832 /* Nonzero if tuning for XScale */
833 int arm_tune_xscale
= 0;
835 /* Nonzero if we want to tune for stores that access the write-buffer.
836 This typically means an ARM6 or ARM7 with MMU or MPU. */
837 int arm_tune_wbuf
= 0;
839 /* Nonzero if tuning for Cortex-A9. */
840 int arm_tune_cortex_a9
= 0;
842 /* Nonzero if we should define __THUMB_INTERWORK__ in the
844 XXX This is a bit of a hack, it's intended to help work around
845 problems in GLD which doesn't understand that armv5t code is
846 interworking clean. */
847 int arm_cpp_interwork
= 0;
849 /* Nonzero if chip supports Thumb 1. */
852 /* Nonzero if chip supports Thumb 2. */
855 /* Nonzero if chip supports integer division instruction. */
856 int arm_arch_arm_hwdiv
;
857 int arm_arch_thumb_hwdiv
;
859 /* Nonzero if chip disallows volatile memory access in IT block. */
860 int arm_arch_no_volatile_ce
;
862 /* Nonzero if we should use Neon to handle 64-bits operations rather
863 than core registers. */
864 int prefer_neon_for_64bits
= 0;
866 /* Nonzero if we shouldn't use literal pools. */
867 bool arm_disable_literal_pool
= false;
869 /* The register number to be used for the PIC offset register. */
870 unsigned arm_pic_register
= INVALID_REGNUM
;
872 enum arm_pcs arm_pcs_default
;
874 /* For an explanation of these variables, see final_prescan_insn below. */
876 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
877 enum arm_cond_code arm_current_cc
;
880 int arm_target_label
;
881 /* The number of conditionally executed insns, including the current insn. */
882 int arm_condexec_count
= 0;
883 /* A bitmask specifying the patterns for the IT block.
884 Zero means do not output an IT block before this insn. */
885 int arm_condexec_mask
= 0;
886 /* The number of bits used in arm_condexec_mask. */
887 int arm_condexec_masklen
= 0;
889 /* Nonzero if chip supports the ARMv8 CRC instructions. */
890 int arm_arch_crc
= 0;
892 /* Nonzero if the core has a very small, high-latency, multiply unit. */
893 int arm_m_profile_small_mul
= 0;
895 /* The condition codes of the ARM, and the inverse function. */
896 static const char * const arm_condition_codes
[] =
898 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
899 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
902 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
903 int arm_regs_in_sequence
[] =
905 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
908 #define ARM_LSL_NAME "lsl"
909 #define streq(string1, string2) (strcmp (string1, string2) == 0)
911 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
912 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
913 | (1 << PIC_OFFSET_TABLE_REGNUM)))
915 /* Initialization code. */
919 const char *const name
;
920 enum processor_type core
;
922 enum base_architecture base_arch
;
923 const arm_feature_set flags
;
924 const struct tune_params
*const tune
;
928 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
929 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
936 /* arm generic vectorizer costs. */
938 struct cpu_vec_costs arm_default_vec_cost
= {
939 1, /* scalar_stmt_cost. */
940 1, /* scalar load_cost. */
941 1, /* scalar_store_cost. */
942 1, /* vec_stmt_cost. */
943 1, /* vec_to_scalar_cost. */
944 1, /* scalar_to_vec_cost. */
945 1, /* vec_align_load_cost. */
946 1, /* vec_unalign_load_cost. */
947 1, /* vec_unalign_store_cost. */
948 1, /* vec_store_cost. */
949 3, /* cond_taken_branch_cost. */
950 1, /* cond_not_taken_branch_cost. */
953 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
954 #include "aarch-cost-tables.h"
958 const struct cpu_cost_table cortexa9_extra_costs
=
965 COSTS_N_INSNS (1), /* shift_reg. */
966 COSTS_N_INSNS (1), /* arith_shift. */
967 COSTS_N_INSNS (2), /* arith_shift_reg. */
969 COSTS_N_INSNS (1), /* log_shift_reg. */
970 COSTS_N_INSNS (1), /* extend. */
971 COSTS_N_INSNS (2), /* extend_arith. */
972 COSTS_N_INSNS (1), /* bfi. */
973 COSTS_N_INSNS (1), /* bfx. */
977 true /* non_exec_costs_exec. */
982 COSTS_N_INSNS (3), /* simple. */
983 COSTS_N_INSNS (3), /* flag_setting. */
984 COSTS_N_INSNS (2), /* extend. */
985 COSTS_N_INSNS (3), /* add. */
986 COSTS_N_INSNS (2), /* extend_add. */
987 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
991 0, /* simple (N/A). */
992 0, /* flag_setting (N/A). */
993 COSTS_N_INSNS (4), /* extend. */
995 COSTS_N_INSNS (4), /* extend_add. */
1001 COSTS_N_INSNS (2), /* load. */
1002 COSTS_N_INSNS (2), /* load_sign_extend. */
1003 COSTS_N_INSNS (2), /* ldrd. */
1004 COSTS_N_INSNS (2), /* ldm_1st. */
1005 1, /* ldm_regs_per_insn_1st. */
1006 2, /* ldm_regs_per_insn_subsequent. */
1007 COSTS_N_INSNS (5), /* loadf. */
1008 COSTS_N_INSNS (5), /* loadd. */
1009 COSTS_N_INSNS (1), /* load_unaligned. */
1010 COSTS_N_INSNS (2), /* store. */
1011 COSTS_N_INSNS (2), /* strd. */
1012 COSTS_N_INSNS (2), /* stm_1st. */
1013 1, /* stm_regs_per_insn_1st. */
1014 2, /* stm_regs_per_insn_subsequent. */
1015 COSTS_N_INSNS (1), /* storef. */
1016 COSTS_N_INSNS (1), /* stored. */
1017 COSTS_N_INSNS (1), /* store_unaligned. */
1018 COSTS_N_INSNS (1), /* loadv. */
1019 COSTS_N_INSNS (1) /* storev. */
1024 COSTS_N_INSNS (14), /* div. */
1025 COSTS_N_INSNS (4), /* mult. */
1026 COSTS_N_INSNS (7), /* mult_addsub. */
1027 COSTS_N_INSNS (30), /* fma. */
1028 COSTS_N_INSNS (3), /* addsub. */
1029 COSTS_N_INSNS (1), /* fpconst. */
1030 COSTS_N_INSNS (1), /* neg. */
1031 COSTS_N_INSNS (3), /* compare. */
1032 COSTS_N_INSNS (3), /* widen. */
1033 COSTS_N_INSNS (3), /* narrow. */
1034 COSTS_N_INSNS (3), /* toint. */
1035 COSTS_N_INSNS (3), /* fromint. */
1036 COSTS_N_INSNS (3) /* roundint. */
1040 COSTS_N_INSNS (24), /* div. */
1041 COSTS_N_INSNS (5), /* mult. */
1042 COSTS_N_INSNS (8), /* mult_addsub. */
1043 COSTS_N_INSNS (30), /* fma. */
1044 COSTS_N_INSNS (3), /* addsub. */
1045 COSTS_N_INSNS (1), /* fpconst. */
1046 COSTS_N_INSNS (1), /* neg. */
1047 COSTS_N_INSNS (3), /* compare. */
1048 COSTS_N_INSNS (3), /* widen. */
1049 COSTS_N_INSNS (3), /* narrow. */
1050 COSTS_N_INSNS (3), /* toint. */
1051 COSTS_N_INSNS (3), /* fromint. */
1052 COSTS_N_INSNS (3) /* roundint. */
1057 COSTS_N_INSNS (1) /* alu. */
1061 const struct cpu_cost_table cortexa8_extra_costs
=
1067 COSTS_N_INSNS (1), /* shift. */
1069 COSTS_N_INSNS (1), /* arith_shift. */
1070 0, /* arith_shift_reg. */
1071 COSTS_N_INSNS (1), /* log_shift. */
1072 0, /* log_shift_reg. */
1074 0, /* extend_arith. */
1080 true /* non_exec_costs_exec. */
1085 COSTS_N_INSNS (1), /* simple. */
1086 COSTS_N_INSNS (1), /* flag_setting. */
1087 COSTS_N_INSNS (1), /* extend. */
1088 COSTS_N_INSNS (1), /* add. */
1089 COSTS_N_INSNS (1), /* extend_add. */
1090 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1094 0, /* simple (N/A). */
1095 0, /* flag_setting (N/A). */
1096 COSTS_N_INSNS (2), /* extend. */
1098 COSTS_N_INSNS (2), /* extend_add. */
1104 COSTS_N_INSNS (1), /* load. */
1105 COSTS_N_INSNS (1), /* load_sign_extend. */
1106 COSTS_N_INSNS (1), /* ldrd. */
1107 COSTS_N_INSNS (1), /* ldm_1st. */
1108 1, /* ldm_regs_per_insn_1st. */
1109 2, /* ldm_regs_per_insn_subsequent. */
1110 COSTS_N_INSNS (1), /* loadf. */
1111 COSTS_N_INSNS (1), /* loadd. */
1112 COSTS_N_INSNS (1), /* load_unaligned. */
1113 COSTS_N_INSNS (1), /* store. */
1114 COSTS_N_INSNS (1), /* strd. */
1115 COSTS_N_INSNS (1), /* stm_1st. */
1116 1, /* stm_regs_per_insn_1st. */
1117 2, /* stm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* storef. */
1119 COSTS_N_INSNS (1), /* stored. */
1120 COSTS_N_INSNS (1), /* store_unaligned. */
1121 COSTS_N_INSNS (1), /* loadv. */
1122 COSTS_N_INSNS (1) /* storev. */
1127 COSTS_N_INSNS (36), /* div. */
1128 COSTS_N_INSNS (11), /* mult. */
1129 COSTS_N_INSNS (20), /* mult_addsub. */
1130 COSTS_N_INSNS (30), /* fma. */
1131 COSTS_N_INSNS (9), /* addsub. */
1132 COSTS_N_INSNS (3), /* fpconst. */
1133 COSTS_N_INSNS (3), /* neg. */
1134 COSTS_N_INSNS (6), /* compare. */
1135 COSTS_N_INSNS (4), /* widen. */
1136 COSTS_N_INSNS (4), /* narrow. */
1137 COSTS_N_INSNS (8), /* toint. */
1138 COSTS_N_INSNS (8), /* fromint. */
1139 COSTS_N_INSNS (8) /* roundint. */
1143 COSTS_N_INSNS (64), /* div. */
1144 COSTS_N_INSNS (16), /* mult. */
1145 COSTS_N_INSNS (25), /* mult_addsub. */
1146 COSTS_N_INSNS (30), /* fma. */
1147 COSTS_N_INSNS (9), /* addsub. */
1148 COSTS_N_INSNS (3), /* fpconst. */
1149 COSTS_N_INSNS (3), /* neg. */
1150 COSTS_N_INSNS (6), /* compare. */
1151 COSTS_N_INSNS (6), /* widen. */
1152 COSTS_N_INSNS (6), /* narrow. */
1153 COSTS_N_INSNS (8), /* toint. */
1154 COSTS_N_INSNS (8), /* fromint. */
1155 COSTS_N_INSNS (8) /* roundint. */
1160 COSTS_N_INSNS (1) /* alu. */
1164 const struct cpu_cost_table cortexa5_extra_costs
=
1170 COSTS_N_INSNS (1), /* shift. */
1171 COSTS_N_INSNS (1), /* shift_reg. */
1172 COSTS_N_INSNS (1), /* arith_shift. */
1173 COSTS_N_INSNS (1), /* arith_shift_reg. */
1174 COSTS_N_INSNS (1), /* log_shift. */
1175 COSTS_N_INSNS (1), /* log_shift_reg. */
1176 COSTS_N_INSNS (1), /* extend. */
1177 COSTS_N_INSNS (1), /* extend_arith. */
1178 COSTS_N_INSNS (1), /* bfi. */
1179 COSTS_N_INSNS (1), /* bfx. */
1180 COSTS_N_INSNS (1), /* clz. */
1181 COSTS_N_INSNS (1), /* rev. */
1183 true /* non_exec_costs_exec. */
1190 COSTS_N_INSNS (1), /* flag_setting. */
1191 COSTS_N_INSNS (1), /* extend. */
1192 COSTS_N_INSNS (1), /* add. */
1193 COSTS_N_INSNS (1), /* extend_add. */
1194 COSTS_N_INSNS (7) /* idiv. */
1198 0, /* simple (N/A). */
1199 0, /* flag_setting (N/A). */
1200 COSTS_N_INSNS (1), /* extend. */
1202 COSTS_N_INSNS (2), /* extend_add. */
1208 COSTS_N_INSNS (1), /* load. */
1209 COSTS_N_INSNS (1), /* load_sign_extend. */
1210 COSTS_N_INSNS (6), /* ldrd. */
1211 COSTS_N_INSNS (1), /* ldm_1st. */
1212 1, /* ldm_regs_per_insn_1st. */
1213 2, /* ldm_regs_per_insn_subsequent. */
1214 COSTS_N_INSNS (2), /* loadf. */
1215 COSTS_N_INSNS (4), /* loadd. */
1216 COSTS_N_INSNS (1), /* load_unaligned. */
1217 COSTS_N_INSNS (1), /* store. */
1218 COSTS_N_INSNS (3), /* strd. */
1219 COSTS_N_INSNS (1), /* stm_1st. */
1220 1, /* stm_regs_per_insn_1st. */
1221 2, /* stm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (2), /* storef. */
1223 COSTS_N_INSNS (2), /* stored. */
1224 COSTS_N_INSNS (1), /* store_unaligned. */
1225 COSTS_N_INSNS (1), /* loadv. */
1226 COSTS_N_INSNS (1) /* storev. */
1231 COSTS_N_INSNS (15), /* div. */
1232 COSTS_N_INSNS (3), /* mult. */
1233 COSTS_N_INSNS (7), /* mult_addsub. */
1234 COSTS_N_INSNS (7), /* fma. */
1235 COSTS_N_INSNS (3), /* addsub. */
1236 COSTS_N_INSNS (3), /* fpconst. */
1237 COSTS_N_INSNS (3), /* neg. */
1238 COSTS_N_INSNS (3), /* compare. */
1239 COSTS_N_INSNS (3), /* widen. */
1240 COSTS_N_INSNS (3), /* narrow. */
1241 COSTS_N_INSNS (3), /* toint. */
1242 COSTS_N_INSNS (3), /* fromint. */
1243 COSTS_N_INSNS (3) /* roundint. */
1247 COSTS_N_INSNS (30), /* div. */
1248 COSTS_N_INSNS (6), /* mult. */
1249 COSTS_N_INSNS (10), /* mult_addsub. */
1250 COSTS_N_INSNS (7), /* fma. */
1251 COSTS_N_INSNS (3), /* addsub. */
1252 COSTS_N_INSNS (3), /* fpconst. */
1253 COSTS_N_INSNS (3), /* neg. */
1254 COSTS_N_INSNS (3), /* compare. */
1255 COSTS_N_INSNS (3), /* widen. */
1256 COSTS_N_INSNS (3), /* narrow. */
1257 COSTS_N_INSNS (3), /* toint. */
1258 COSTS_N_INSNS (3), /* fromint. */
1259 COSTS_N_INSNS (3) /* roundint. */
1264 COSTS_N_INSNS (1) /* alu. */
1269 const struct cpu_cost_table cortexa7_extra_costs
=
1275 COSTS_N_INSNS (1), /* shift. */
1276 COSTS_N_INSNS (1), /* shift_reg. */
1277 COSTS_N_INSNS (1), /* arith_shift. */
1278 COSTS_N_INSNS (1), /* arith_shift_reg. */
1279 COSTS_N_INSNS (1), /* log_shift. */
1280 COSTS_N_INSNS (1), /* log_shift_reg. */
1281 COSTS_N_INSNS (1), /* extend. */
1282 COSTS_N_INSNS (1), /* extend_arith. */
1283 COSTS_N_INSNS (1), /* bfi. */
1284 COSTS_N_INSNS (1), /* bfx. */
1285 COSTS_N_INSNS (1), /* clz. */
1286 COSTS_N_INSNS (1), /* rev. */
1288 true /* non_exec_costs_exec. */
1295 COSTS_N_INSNS (1), /* flag_setting. */
1296 COSTS_N_INSNS (1), /* extend. */
1297 COSTS_N_INSNS (1), /* add. */
1298 COSTS_N_INSNS (1), /* extend_add. */
1299 COSTS_N_INSNS (7) /* idiv. */
1303 0, /* simple (N/A). */
1304 0, /* flag_setting (N/A). */
1305 COSTS_N_INSNS (1), /* extend. */
1307 COSTS_N_INSNS (2), /* extend_add. */
1313 COSTS_N_INSNS (1), /* load. */
1314 COSTS_N_INSNS (1), /* load_sign_extend. */
1315 COSTS_N_INSNS (3), /* ldrd. */
1316 COSTS_N_INSNS (1), /* ldm_1st. */
1317 1, /* ldm_regs_per_insn_1st. */
1318 2, /* ldm_regs_per_insn_subsequent. */
1319 COSTS_N_INSNS (2), /* loadf. */
1320 COSTS_N_INSNS (2), /* loadd. */
1321 COSTS_N_INSNS (1), /* load_unaligned. */
1322 COSTS_N_INSNS (1), /* store. */
1323 COSTS_N_INSNS (3), /* strd. */
1324 COSTS_N_INSNS (1), /* stm_1st. */
1325 1, /* stm_regs_per_insn_1st. */
1326 2, /* stm_regs_per_insn_subsequent. */
1327 COSTS_N_INSNS (2), /* storef. */
1328 COSTS_N_INSNS (2), /* stored. */
1329 COSTS_N_INSNS (1), /* store_unaligned. */
1330 COSTS_N_INSNS (1), /* loadv. */
1331 COSTS_N_INSNS (1) /* storev. */
1336 COSTS_N_INSNS (15), /* div. */
1337 COSTS_N_INSNS (3), /* mult. */
1338 COSTS_N_INSNS (7), /* mult_addsub. */
1339 COSTS_N_INSNS (7), /* fma. */
1340 COSTS_N_INSNS (3), /* addsub. */
1341 COSTS_N_INSNS (3), /* fpconst. */
1342 COSTS_N_INSNS (3), /* neg. */
1343 COSTS_N_INSNS (3), /* compare. */
1344 COSTS_N_INSNS (3), /* widen. */
1345 COSTS_N_INSNS (3), /* narrow. */
1346 COSTS_N_INSNS (3), /* toint. */
1347 COSTS_N_INSNS (3), /* fromint. */
1348 COSTS_N_INSNS (3) /* roundint. */
1352 COSTS_N_INSNS (30), /* div. */
1353 COSTS_N_INSNS (6), /* mult. */
1354 COSTS_N_INSNS (10), /* mult_addsub. */
1355 COSTS_N_INSNS (7), /* fma. */
1356 COSTS_N_INSNS (3), /* addsub. */
1357 COSTS_N_INSNS (3), /* fpconst. */
1358 COSTS_N_INSNS (3), /* neg. */
1359 COSTS_N_INSNS (3), /* compare. */
1360 COSTS_N_INSNS (3), /* widen. */
1361 COSTS_N_INSNS (3), /* narrow. */
1362 COSTS_N_INSNS (3), /* toint. */
1363 COSTS_N_INSNS (3), /* fromint. */
1364 COSTS_N_INSNS (3) /* roundint. */
1369 COSTS_N_INSNS (1) /* alu. */
1373 const struct cpu_cost_table cortexa12_extra_costs
=
1380 COSTS_N_INSNS (1), /* shift_reg. */
1381 COSTS_N_INSNS (1), /* arith_shift. */
1382 COSTS_N_INSNS (1), /* arith_shift_reg. */
1383 COSTS_N_INSNS (1), /* log_shift. */
1384 COSTS_N_INSNS (1), /* log_shift_reg. */
1386 COSTS_N_INSNS (1), /* extend_arith. */
1388 COSTS_N_INSNS (1), /* bfx. */
1389 COSTS_N_INSNS (1), /* clz. */
1390 COSTS_N_INSNS (1), /* rev. */
1392 true /* non_exec_costs_exec. */
1397 COSTS_N_INSNS (2), /* simple. */
1398 COSTS_N_INSNS (3), /* flag_setting. */
1399 COSTS_N_INSNS (2), /* extend. */
1400 COSTS_N_INSNS (3), /* add. */
1401 COSTS_N_INSNS (2), /* extend_add. */
1402 COSTS_N_INSNS (18) /* idiv. */
1406 0, /* simple (N/A). */
1407 0, /* flag_setting (N/A). */
1408 COSTS_N_INSNS (3), /* extend. */
1410 COSTS_N_INSNS (3), /* extend_add. */
1416 COSTS_N_INSNS (3), /* load. */
1417 COSTS_N_INSNS (3), /* load_sign_extend. */
1418 COSTS_N_INSNS (3), /* ldrd. */
1419 COSTS_N_INSNS (3), /* ldm_1st. */
1420 1, /* ldm_regs_per_insn_1st. */
1421 2, /* ldm_regs_per_insn_subsequent. */
1422 COSTS_N_INSNS (3), /* loadf. */
1423 COSTS_N_INSNS (3), /* loadd. */
1424 0, /* load_unaligned. */
1428 1, /* stm_regs_per_insn_1st. */
1429 2, /* stm_regs_per_insn_subsequent. */
1430 COSTS_N_INSNS (2), /* storef. */
1431 COSTS_N_INSNS (2), /* stored. */
1432 0, /* store_unaligned. */
1433 COSTS_N_INSNS (1), /* loadv. */
1434 COSTS_N_INSNS (1) /* storev. */
1439 COSTS_N_INSNS (17), /* div. */
1440 COSTS_N_INSNS (4), /* mult. */
1441 COSTS_N_INSNS (8), /* mult_addsub. */
1442 COSTS_N_INSNS (8), /* fma. */
1443 COSTS_N_INSNS (4), /* addsub. */
1444 COSTS_N_INSNS (2), /* fpconst. */
1445 COSTS_N_INSNS (2), /* neg. */
1446 COSTS_N_INSNS (2), /* compare. */
1447 COSTS_N_INSNS (4), /* widen. */
1448 COSTS_N_INSNS (4), /* narrow. */
1449 COSTS_N_INSNS (4), /* toint. */
1450 COSTS_N_INSNS (4), /* fromint. */
1451 COSTS_N_INSNS (4) /* roundint. */
1455 COSTS_N_INSNS (31), /* div. */
1456 COSTS_N_INSNS (4), /* mult. */
1457 COSTS_N_INSNS (8), /* mult_addsub. */
1458 COSTS_N_INSNS (8), /* fma. */
1459 COSTS_N_INSNS (4), /* addsub. */
1460 COSTS_N_INSNS (2), /* fpconst. */
1461 COSTS_N_INSNS (2), /* neg. */
1462 COSTS_N_INSNS (2), /* compare. */
1463 COSTS_N_INSNS (4), /* widen. */
1464 COSTS_N_INSNS (4), /* narrow. */
1465 COSTS_N_INSNS (4), /* toint. */
1466 COSTS_N_INSNS (4), /* fromint. */
1467 COSTS_N_INSNS (4) /* roundint. */
1472 COSTS_N_INSNS (1) /* alu. */
1476 const struct cpu_cost_table cortexa15_extra_costs
=
1484 COSTS_N_INSNS (1), /* arith_shift. */
1485 COSTS_N_INSNS (1), /* arith_shift_reg. */
1486 COSTS_N_INSNS (1), /* log_shift. */
1487 COSTS_N_INSNS (1), /* log_shift_reg. */
1489 COSTS_N_INSNS (1), /* extend_arith. */
1490 COSTS_N_INSNS (1), /* bfi. */
1495 true /* non_exec_costs_exec. */
1500 COSTS_N_INSNS (2), /* simple. */
1501 COSTS_N_INSNS (3), /* flag_setting. */
1502 COSTS_N_INSNS (2), /* extend. */
1503 COSTS_N_INSNS (2), /* add. */
1504 COSTS_N_INSNS (2), /* extend_add. */
1505 COSTS_N_INSNS (18) /* idiv. */
1509 0, /* simple (N/A). */
1510 0, /* flag_setting (N/A). */
1511 COSTS_N_INSNS (3), /* extend. */
1513 COSTS_N_INSNS (3), /* extend_add. */
1519 COSTS_N_INSNS (3), /* load. */
1520 COSTS_N_INSNS (3), /* load_sign_extend. */
1521 COSTS_N_INSNS (3), /* ldrd. */
1522 COSTS_N_INSNS (4), /* ldm_1st. */
1523 1, /* ldm_regs_per_insn_1st. */
1524 2, /* ldm_regs_per_insn_subsequent. */
1525 COSTS_N_INSNS (4), /* loadf. */
1526 COSTS_N_INSNS (4), /* loadd. */
1527 0, /* load_unaligned. */
1530 COSTS_N_INSNS (1), /* stm_1st. */
1531 1, /* stm_regs_per_insn_1st. */
1532 2, /* stm_regs_per_insn_subsequent. */
1535 0, /* store_unaligned. */
1536 COSTS_N_INSNS (1), /* loadv. */
1537 COSTS_N_INSNS (1) /* storev. */
1542 COSTS_N_INSNS (17), /* div. */
1543 COSTS_N_INSNS (4), /* mult. */
1544 COSTS_N_INSNS (8), /* mult_addsub. */
1545 COSTS_N_INSNS (8), /* fma. */
1546 COSTS_N_INSNS (4), /* addsub. */
1547 COSTS_N_INSNS (2), /* fpconst. */
1548 COSTS_N_INSNS (2), /* neg. */
1549 COSTS_N_INSNS (5), /* compare. */
1550 COSTS_N_INSNS (4), /* widen. */
1551 COSTS_N_INSNS (4), /* narrow. */
1552 COSTS_N_INSNS (4), /* toint. */
1553 COSTS_N_INSNS (4), /* fromint. */
1554 COSTS_N_INSNS (4) /* roundint. */
1558 COSTS_N_INSNS (31), /* div. */
1559 COSTS_N_INSNS (4), /* mult. */
1560 COSTS_N_INSNS (8), /* mult_addsub. */
1561 COSTS_N_INSNS (8), /* fma. */
1562 COSTS_N_INSNS (4), /* addsub. */
1563 COSTS_N_INSNS (2), /* fpconst. */
1564 COSTS_N_INSNS (2), /* neg. */
1565 COSTS_N_INSNS (2), /* compare. */
1566 COSTS_N_INSNS (4), /* widen. */
1567 COSTS_N_INSNS (4), /* narrow. */
1568 COSTS_N_INSNS (4), /* toint. */
1569 COSTS_N_INSNS (4), /* fromint. */
1570 COSTS_N_INSNS (4) /* roundint. */
1575 COSTS_N_INSNS (1) /* alu. */
1579 const struct cpu_cost_table v7m_extra_costs
=
1587 0, /* arith_shift. */
1588 COSTS_N_INSNS (1), /* arith_shift_reg. */
1590 COSTS_N_INSNS (1), /* log_shift_reg. */
1592 COSTS_N_INSNS (1), /* extend_arith. */
1597 COSTS_N_INSNS (1), /* non_exec. */
1598 false /* non_exec_costs_exec. */
1603 COSTS_N_INSNS (1), /* simple. */
1604 COSTS_N_INSNS (1), /* flag_setting. */
1605 COSTS_N_INSNS (2), /* extend. */
1606 COSTS_N_INSNS (1), /* add. */
1607 COSTS_N_INSNS (3), /* extend_add. */
1608 COSTS_N_INSNS (8) /* idiv. */
1612 0, /* simple (N/A). */
1613 0, /* flag_setting (N/A). */
1614 COSTS_N_INSNS (2), /* extend. */
1616 COSTS_N_INSNS (3), /* extend_add. */
1622 COSTS_N_INSNS (2), /* load. */
1623 0, /* load_sign_extend. */
1624 COSTS_N_INSNS (3), /* ldrd. */
1625 COSTS_N_INSNS (2), /* ldm_1st. */
1626 1, /* ldm_regs_per_insn_1st. */
1627 1, /* ldm_regs_per_insn_subsequent. */
1628 COSTS_N_INSNS (2), /* loadf. */
1629 COSTS_N_INSNS (3), /* loadd. */
1630 COSTS_N_INSNS (1), /* load_unaligned. */
1631 COSTS_N_INSNS (2), /* store. */
1632 COSTS_N_INSNS (3), /* strd. */
1633 COSTS_N_INSNS (2), /* stm_1st. */
1634 1, /* stm_regs_per_insn_1st. */
1635 1, /* stm_regs_per_insn_subsequent. */
1636 COSTS_N_INSNS (2), /* storef. */
1637 COSTS_N_INSNS (3), /* stored. */
1638 COSTS_N_INSNS (1), /* store_unaligned. */
1639 COSTS_N_INSNS (1), /* loadv. */
1640 COSTS_N_INSNS (1) /* storev. */
1645 COSTS_N_INSNS (7), /* div. */
1646 COSTS_N_INSNS (2), /* mult. */
1647 COSTS_N_INSNS (5), /* mult_addsub. */
1648 COSTS_N_INSNS (3), /* fma. */
1649 COSTS_N_INSNS (1), /* addsub. */
1661 COSTS_N_INSNS (15), /* div. */
1662 COSTS_N_INSNS (5), /* mult. */
1663 COSTS_N_INSNS (7), /* mult_addsub. */
1664 COSTS_N_INSNS (7), /* fma. */
1665 COSTS_N_INSNS (3), /* addsub. */
1678 COSTS_N_INSNS (1) /* alu. */
1682 const struct tune_params arm_slowmul_tune
=
1684 arm_slowmul_rtx_costs
,
1685 NULL
, /* Insn extra costs. */
1686 NULL
, /* Sched adj cost. */
1687 arm_default_branch_cost
,
1688 &arm_default_vec_cost
,
1689 3, /* Constant limit. */
1690 5, /* Max cond insns. */
1691 8, /* Memset max inline. */
1692 1, /* Issue rate. */
1693 ARM_PREFETCH_NOT_BENEFICIAL
,
1694 tune_params::PREF_CONST_POOL_TRUE
,
1695 tune_params::PREF_LDRD_FALSE
,
1696 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1697 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1698 tune_params::DISPARAGE_FLAGS_NEITHER
,
1699 tune_params::PREF_NEON_64_FALSE
,
1700 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1701 tune_params::FUSE_NOTHING
,
1702 tune_params::SCHED_AUTOPREF_OFF
1705 const struct tune_params arm_fastmul_tune
=
1707 arm_fastmul_rtx_costs
,
1708 NULL
, /* Insn extra costs. */
1709 NULL
, /* Sched adj cost. */
1710 arm_default_branch_cost
,
1711 &arm_default_vec_cost
,
1712 1, /* Constant limit. */
1713 5, /* Max cond insns. */
1714 8, /* Memset max inline. */
1715 1, /* Issue rate. */
1716 ARM_PREFETCH_NOT_BENEFICIAL
,
1717 tune_params::PREF_CONST_POOL_TRUE
,
1718 tune_params::PREF_LDRD_FALSE
,
1719 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1720 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1721 tune_params::DISPARAGE_FLAGS_NEITHER
,
1722 tune_params::PREF_NEON_64_FALSE
,
1723 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1724 tune_params::FUSE_NOTHING
,
1725 tune_params::SCHED_AUTOPREF_OFF
1728 /* StrongARM has early execution of branches, so a sequence that is worth
1729 skipping is shorter. Set max_insns_skipped to a lower value. */
1731 const struct tune_params arm_strongarm_tune
=
1733 arm_fastmul_rtx_costs
,
1734 NULL
, /* Insn extra costs. */
1735 NULL
, /* Sched adj cost. */
1736 arm_default_branch_cost
,
1737 &arm_default_vec_cost
,
1738 1, /* Constant limit. */
1739 3, /* Max cond insns. */
1740 8, /* Memset max inline. */
1741 1, /* Issue rate. */
1742 ARM_PREFETCH_NOT_BENEFICIAL
,
1743 tune_params::PREF_CONST_POOL_TRUE
,
1744 tune_params::PREF_LDRD_FALSE
,
1745 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1746 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1747 tune_params::DISPARAGE_FLAGS_NEITHER
,
1748 tune_params::PREF_NEON_64_FALSE
,
1749 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1750 tune_params::FUSE_NOTHING
,
1751 tune_params::SCHED_AUTOPREF_OFF
1754 const struct tune_params arm_xscale_tune
=
1756 arm_xscale_rtx_costs
,
1757 NULL
, /* Insn extra costs. */
1758 xscale_sched_adjust_cost
,
1759 arm_default_branch_cost
,
1760 &arm_default_vec_cost
,
1761 2, /* Constant limit. */
1762 3, /* Max cond insns. */
1763 8, /* Memset max inline. */
1764 1, /* Issue rate. */
1765 ARM_PREFETCH_NOT_BENEFICIAL
,
1766 tune_params::PREF_CONST_POOL_TRUE
,
1767 tune_params::PREF_LDRD_FALSE
,
1768 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1769 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1770 tune_params::DISPARAGE_FLAGS_NEITHER
,
1771 tune_params::PREF_NEON_64_FALSE
,
1772 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1773 tune_params::FUSE_NOTHING
,
1774 tune_params::SCHED_AUTOPREF_OFF
1777 const struct tune_params arm_9e_tune
=
1780 NULL
, /* Insn extra costs. */
1781 NULL
, /* Sched adj cost. */
1782 arm_default_branch_cost
,
1783 &arm_default_vec_cost
,
1784 1, /* Constant limit. */
1785 5, /* Max cond insns. */
1786 8, /* Memset max inline. */
1787 1, /* Issue rate. */
1788 ARM_PREFETCH_NOT_BENEFICIAL
,
1789 tune_params::PREF_CONST_POOL_TRUE
,
1790 tune_params::PREF_LDRD_FALSE
,
1791 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1792 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1793 tune_params::DISPARAGE_FLAGS_NEITHER
,
1794 tune_params::PREF_NEON_64_FALSE
,
1795 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1796 tune_params::FUSE_NOTHING
,
1797 tune_params::SCHED_AUTOPREF_OFF
1800 const struct tune_params arm_marvell_pj4_tune
=
1803 NULL
, /* Insn extra costs. */
1804 NULL
, /* Sched adj cost. */
1805 arm_default_branch_cost
,
1806 &arm_default_vec_cost
,
1807 1, /* Constant limit. */
1808 5, /* Max cond insns. */
1809 8, /* Memset max inline. */
1810 2, /* Issue rate. */
1811 ARM_PREFETCH_NOT_BENEFICIAL
,
1812 tune_params::PREF_CONST_POOL_TRUE
,
1813 tune_params::PREF_LDRD_FALSE
,
1814 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1815 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1816 tune_params::DISPARAGE_FLAGS_NEITHER
,
1817 tune_params::PREF_NEON_64_FALSE
,
1818 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1819 tune_params::FUSE_NOTHING
,
1820 tune_params::SCHED_AUTOPREF_OFF
1823 const struct tune_params arm_v6t2_tune
=
1826 NULL
, /* Insn extra costs. */
1827 NULL
, /* Sched adj cost. */
1828 arm_default_branch_cost
,
1829 &arm_default_vec_cost
,
1830 1, /* Constant limit. */
1831 5, /* Max cond insns. */
1832 8, /* Memset max inline. */
1833 1, /* Issue rate. */
1834 ARM_PREFETCH_NOT_BENEFICIAL
,
1835 tune_params::PREF_CONST_POOL_FALSE
,
1836 tune_params::PREF_LDRD_FALSE
,
1837 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1838 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1839 tune_params::DISPARAGE_FLAGS_NEITHER
,
1840 tune_params::PREF_NEON_64_FALSE
,
1841 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1842 tune_params::FUSE_NOTHING
,
1843 tune_params::SCHED_AUTOPREF_OFF
1847 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1848 const struct tune_params arm_cortex_tune
=
1851 &generic_extra_costs
,
1852 NULL
, /* Sched adj cost. */
1853 arm_default_branch_cost
,
1854 &arm_default_vec_cost
,
1855 1, /* Constant limit. */
1856 5, /* Max cond insns. */
1857 8, /* Memset max inline. */
1858 2, /* Issue rate. */
1859 ARM_PREFETCH_NOT_BENEFICIAL
,
1860 tune_params::PREF_CONST_POOL_FALSE
,
1861 tune_params::PREF_LDRD_FALSE
,
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1863 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1864 tune_params::DISPARAGE_FLAGS_NEITHER
,
1865 tune_params::PREF_NEON_64_FALSE
,
1866 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1867 tune_params::FUSE_NOTHING
,
1868 tune_params::SCHED_AUTOPREF_OFF
1871 const struct tune_params arm_cortex_a8_tune
=
1874 &cortexa8_extra_costs
,
1875 NULL
, /* Sched adj cost. */
1876 arm_default_branch_cost
,
1877 &arm_default_vec_cost
,
1878 1, /* Constant limit. */
1879 5, /* Max cond insns. */
1880 8, /* Memset max inline. */
1881 2, /* Issue rate. */
1882 ARM_PREFETCH_NOT_BENEFICIAL
,
1883 tune_params::PREF_CONST_POOL_FALSE
,
1884 tune_params::PREF_LDRD_FALSE
,
1885 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1886 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1887 tune_params::DISPARAGE_FLAGS_NEITHER
,
1888 tune_params::PREF_NEON_64_FALSE
,
1889 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1890 tune_params::FUSE_NOTHING
,
1891 tune_params::SCHED_AUTOPREF_OFF
1894 const struct tune_params arm_cortex_a7_tune
=
1897 &cortexa7_extra_costs
,
1898 NULL
, /* Sched adj cost. */
1899 arm_default_branch_cost
,
1900 &arm_default_vec_cost
,
1901 1, /* Constant limit. */
1902 5, /* Max cond insns. */
1903 8, /* Memset max inline. */
1904 2, /* Issue rate. */
1905 ARM_PREFETCH_NOT_BENEFICIAL
,
1906 tune_params::PREF_CONST_POOL_FALSE
,
1907 tune_params::PREF_LDRD_FALSE
,
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1909 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1910 tune_params::DISPARAGE_FLAGS_NEITHER
,
1911 tune_params::PREF_NEON_64_FALSE
,
1912 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1913 tune_params::FUSE_NOTHING
,
1914 tune_params::SCHED_AUTOPREF_OFF
1917 const struct tune_params arm_cortex_a15_tune
=
1920 &cortexa15_extra_costs
,
1921 NULL
, /* Sched adj cost. */
1922 arm_default_branch_cost
,
1923 &arm_default_vec_cost
,
1924 1, /* Constant limit. */
1925 2, /* Max cond insns. */
1926 8, /* Memset max inline. */
1927 3, /* Issue rate. */
1928 ARM_PREFETCH_NOT_BENEFICIAL
,
1929 tune_params::PREF_CONST_POOL_FALSE
,
1930 tune_params::PREF_LDRD_TRUE
,
1931 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1932 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1933 tune_params::DISPARAGE_FLAGS_ALL
,
1934 tune_params::PREF_NEON_64_FALSE
,
1935 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1936 tune_params::FUSE_NOTHING
,
1937 tune_params::SCHED_AUTOPREF_FULL
1940 const struct tune_params arm_cortex_a35_tune
=
1943 &cortexa53_extra_costs
,
1944 NULL
, /* Sched adj cost. */
1945 arm_default_branch_cost
,
1946 &arm_default_vec_cost
,
1947 1, /* Constant limit. */
1948 5, /* Max cond insns. */
1949 8, /* Memset max inline. */
1950 1, /* Issue rate. */
1951 ARM_PREFETCH_NOT_BENEFICIAL
,
1952 tune_params::PREF_CONST_POOL_FALSE
,
1953 tune_params::PREF_LDRD_FALSE
,
1954 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1955 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1956 tune_params::DISPARAGE_FLAGS_NEITHER
,
1957 tune_params::PREF_NEON_64_FALSE
,
1958 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1959 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1960 tune_params::SCHED_AUTOPREF_OFF
1963 const struct tune_params arm_cortex_a53_tune
=
1966 &cortexa53_extra_costs
,
1967 NULL
, /* Sched adj cost. */
1968 arm_default_branch_cost
,
1969 &arm_default_vec_cost
,
1970 1, /* Constant limit. */
1971 5, /* Max cond insns. */
1972 8, /* Memset max inline. */
1973 2, /* Issue rate. */
1974 ARM_PREFETCH_NOT_BENEFICIAL
,
1975 tune_params::PREF_CONST_POOL_FALSE
,
1976 tune_params::PREF_LDRD_FALSE
,
1977 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1978 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1979 tune_params::DISPARAGE_FLAGS_NEITHER
,
1980 tune_params::PREF_NEON_64_FALSE
,
1981 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1982 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
1983 tune_params::SCHED_AUTOPREF_OFF
1986 const struct tune_params arm_cortex_a57_tune
=
1989 &cortexa57_extra_costs
,
1990 NULL
, /* Sched adj cost. */
1991 arm_default_branch_cost
,
1992 &arm_default_vec_cost
,
1993 1, /* Constant limit. */
1994 2, /* Max cond insns. */
1995 8, /* Memset max inline. */
1996 3, /* Issue rate. */
1997 ARM_PREFETCH_NOT_BENEFICIAL
,
1998 tune_params::PREF_CONST_POOL_FALSE
,
1999 tune_params::PREF_LDRD_TRUE
,
2000 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2002 tune_params::DISPARAGE_FLAGS_ALL
,
2003 tune_params::PREF_NEON_64_FALSE
,
2004 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2005 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2006 tune_params::SCHED_AUTOPREF_FULL
2009 const struct tune_params arm_exynosm1_tune
=
2012 &exynosm1_extra_costs
,
2013 NULL
, /* Sched adj cost. */
2014 arm_default_branch_cost
,
2015 &arm_default_vec_cost
,
2016 1, /* Constant limit. */
2017 2, /* Max cond insns. */
2018 8, /* Memset max inline. */
2019 3, /* Issue rate. */
2020 ARM_PREFETCH_NOT_BENEFICIAL
,
2021 tune_params::PREF_CONST_POOL_FALSE
,
2022 tune_params::PREF_LDRD_TRUE
,
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2025 tune_params::DISPARAGE_FLAGS_ALL
,
2026 tune_params::PREF_NEON_64_FALSE
,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2028 tune_params::FUSE_NOTHING
,
2029 tune_params::SCHED_AUTOPREF_OFF
2032 const struct tune_params arm_xgene1_tune
=
2035 &xgene1_extra_costs
,
2036 NULL
, /* Sched adj cost. */
2037 arm_default_branch_cost
,
2038 &arm_default_vec_cost
,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 32, /* Memset max inline. */
2042 4, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL
,
2044 tune_params::PREF_CONST_POOL_FALSE
,
2045 tune_params::PREF_LDRD_TRUE
,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL
,
2049 tune_params::PREF_NEON_64_FALSE
,
2050 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2051 tune_params::FUSE_NOTHING
,
2052 tune_params::SCHED_AUTOPREF_OFF
2055 const struct tune_params arm_qdf24xx_tune
=
2058 &qdf24xx_extra_costs
,
2059 NULL
, /* Scheduler cost adjustment. */
2060 arm_default_branch_cost
,
2061 &arm_default_vec_cost
, /* Vectorizer costs. */
2062 1, /* Constant limit. */
2063 2, /* Max cond insns. */
2064 8, /* Memset max inline. */
2065 4, /* Issue rate. */
2066 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2067 tune_params::PREF_CONST_POOL_FALSE
,
2068 tune_params::PREF_LDRD_TRUE
,
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2071 tune_params::DISPARAGE_FLAGS_ALL
,
2072 tune_params::PREF_NEON_64_FALSE
,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2074 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2075 tune_params::SCHED_AUTOPREF_FULL
2078 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2079 less appealing. Set max_insns_skipped to a low value. */
2081 const struct tune_params arm_cortex_a5_tune
=
2084 &cortexa5_extra_costs
,
2085 NULL
, /* Sched adj cost. */
2086 arm_cortex_a5_branch_cost
,
2087 &arm_default_vec_cost
,
2088 1, /* Constant limit. */
2089 1, /* Max cond insns. */
2090 8, /* Memset max inline. */
2091 2, /* Issue rate. */
2092 ARM_PREFETCH_NOT_BENEFICIAL
,
2093 tune_params::PREF_CONST_POOL_FALSE
,
2094 tune_params::PREF_LDRD_FALSE
,
2095 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2096 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2097 tune_params::DISPARAGE_FLAGS_NEITHER
,
2098 tune_params::PREF_NEON_64_FALSE
,
2099 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2100 tune_params::FUSE_NOTHING
,
2101 tune_params::SCHED_AUTOPREF_OFF
2104 const struct tune_params arm_cortex_a9_tune
=
2107 &cortexa9_extra_costs
,
2108 cortex_a9_sched_adjust_cost
,
2109 arm_default_branch_cost
,
2110 &arm_default_vec_cost
,
2111 1, /* Constant limit. */
2112 5, /* Max cond insns. */
2113 8, /* Memset max inline. */
2114 2, /* Issue rate. */
2115 ARM_PREFETCH_BENEFICIAL(4,32,32),
2116 tune_params::PREF_CONST_POOL_FALSE
,
2117 tune_params::PREF_LDRD_FALSE
,
2118 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2119 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2120 tune_params::DISPARAGE_FLAGS_NEITHER
,
2121 tune_params::PREF_NEON_64_FALSE
,
2122 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2123 tune_params::FUSE_NOTHING
,
2124 tune_params::SCHED_AUTOPREF_OFF
2127 const struct tune_params arm_cortex_a12_tune
=
2130 &cortexa12_extra_costs
,
2131 NULL
, /* Sched adj cost. */
2132 arm_default_branch_cost
,
2133 &arm_default_vec_cost
, /* Vectorizer costs. */
2134 1, /* Constant limit. */
2135 2, /* Max cond insns. */
2136 8, /* Memset max inline. */
2137 2, /* Issue rate. */
2138 ARM_PREFETCH_NOT_BENEFICIAL
,
2139 tune_params::PREF_CONST_POOL_FALSE
,
2140 tune_params::PREF_LDRD_TRUE
,
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2142 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2143 tune_params::DISPARAGE_FLAGS_ALL
,
2144 tune_params::PREF_NEON_64_FALSE
,
2145 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2146 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2147 tune_params::SCHED_AUTOPREF_OFF
2150 const struct tune_params arm_cortex_a73_tune
=
2153 &cortexa57_extra_costs
,
2154 NULL
, /* Sched adj cost. */
2155 arm_default_branch_cost
,
2156 &arm_default_vec_cost
, /* Vectorizer costs. */
2157 1, /* Constant limit. */
2158 2, /* Max cond insns. */
2159 8, /* Memset max inline. */
2160 2, /* Issue rate. */
2161 ARM_PREFETCH_NOT_BENEFICIAL
,
2162 tune_params::PREF_CONST_POOL_FALSE
,
2163 tune_params::PREF_LDRD_TRUE
,
2164 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2165 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2166 tune_params::DISPARAGE_FLAGS_ALL
,
2167 tune_params::PREF_NEON_64_FALSE
,
2168 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2169 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2170 tune_params::SCHED_AUTOPREF_FULL
2173 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2174 cycle to execute each. An LDR from the constant pool also takes two cycles
2175 to execute, but mildly increases pipelining opportunity (consecutive
2176 loads/stores can be pipelined together, saving one cycle), and may also
2177 improve icache utilisation. Hence we prefer the constant pool for such
2180 const struct tune_params arm_v7m_tune
=
2184 NULL
, /* Sched adj cost. */
2185 arm_cortex_m_branch_cost
,
2186 &arm_default_vec_cost
,
2187 1, /* Constant limit. */
2188 2, /* Max cond insns. */
2189 8, /* Memset max inline. */
2190 1, /* Issue rate. */
2191 ARM_PREFETCH_NOT_BENEFICIAL
,
2192 tune_params::PREF_CONST_POOL_TRUE
,
2193 tune_params::PREF_LDRD_FALSE
,
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2195 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2196 tune_params::DISPARAGE_FLAGS_NEITHER
,
2197 tune_params::PREF_NEON_64_FALSE
,
2198 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2199 tune_params::FUSE_NOTHING
,
2200 tune_params::SCHED_AUTOPREF_OFF
2203 /* Cortex-M7 tuning. */
2205 const struct tune_params arm_cortex_m7_tune
=
2209 NULL
, /* Sched adj cost. */
2210 arm_cortex_m7_branch_cost
,
2211 &arm_default_vec_cost
,
2212 0, /* Constant limit. */
2213 1, /* Max cond insns. */
2214 8, /* Memset max inline. */
2215 2, /* Issue rate. */
2216 ARM_PREFETCH_NOT_BENEFICIAL
,
2217 tune_params::PREF_CONST_POOL_TRUE
,
2218 tune_params::PREF_LDRD_FALSE
,
2219 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2220 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2221 tune_params::DISPARAGE_FLAGS_NEITHER
,
2222 tune_params::PREF_NEON_64_FALSE
,
2223 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2224 tune_params::FUSE_NOTHING
,
2225 tune_params::SCHED_AUTOPREF_OFF
2228 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2229 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2230 const struct tune_params arm_v6m_tune
=
2233 NULL
, /* Insn extra costs. */
2234 NULL
, /* Sched adj cost. */
2235 arm_default_branch_cost
,
2236 &arm_default_vec_cost
, /* Vectorizer costs. */
2237 1, /* Constant limit. */
2238 5, /* Max cond insns. */
2239 8, /* Memset max inline. */
2240 1, /* Issue rate. */
2241 ARM_PREFETCH_NOT_BENEFICIAL
,
2242 tune_params::PREF_CONST_POOL_FALSE
,
2243 tune_params::PREF_LDRD_FALSE
,
2244 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2245 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2246 tune_params::DISPARAGE_FLAGS_NEITHER
,
2247 tune_params::PREF_NEON_64_FALSE
,
2248 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2249 tune_params::FUSE_NOTHING
,
2250 tune_params::SCHED_AUTOPREF_OFF
2253 const struct tune_params arm_fa726te_tune
=
2256 NULL
, /* Insn extra costs. */
2257 fa726te_sched_adjust_cost
,
2258 arm_default_branch_cost
,
2259 &arm_default_vec_cost
,
2260 1, /* Constant limit. */
2261 5, /* Max cond insns. */
2262 8, /* Memset max inline. */
2263 2, /* Issue rate. */
2264 ARM_PREFETCH_NOT_BENEFICIAL
,
2265 tune_params::PREF_CONST_POOL_TRUE
,
2266 tune_params::PREF_LDRD_FALSE
,
2267 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2268 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2269 tune_params::DISPARAGE_FLAGS_NEITHER
,
2270 tune_params::PREF_NEON_64_FALSE
,
2271 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2272 tune_params::FUSE_NOTHING
,
2273 tune_params::SCHED_AUTOPREF_OFF
2277 /* Not all of these give usefully different compilation alternatives,
2278 but there is no simple way of generalizing them. */
2279 static const struct processors all_cores
[] =
2282 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2283 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2284 FLAGS, &arm_##COSTS##_tune},
2285 #include "arm-cores.def"
2287 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2290 static const struct processors all_architectures
[] =
2292 /* ARM Architectures */
2293 /* We don't specify tuning costs here as it will be figured out
2296 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2297 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2298 #include "arm-arches.def"
2300 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2304 /* These are populated as commandline arguments are processed, or NULL
2305 if not specified. */
2306 static const struct processors
*arm_selected_arch
;
2307 static const struct processors
*arm_selected_cpu
;
2308 static const struct processors
*arm_selected_tune
;
2310 /* The name of the preprocessor macro to define for this architecture. PROFILE
2311 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2312 is thus chosen to be big enough to hold the longest architecture name. */
2314 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2316 /* Available values for -mfpu=. */
2318 const struct arm_fpu_desc all_fpus
[] =
2320 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \
2321 { NAME, MODEL, REV, VFP_REGS, FEATURES },
2322 #include "arm-fpus.def"
2326 /* Supported TLS relocations. */
2334 TLS_DESCSEQ
/* GNU scheme */
2337 /* The maximum number of insns to be used when loading a constant. */
2339 arm_constant_limit (bool size_p
)
2341 return size_p
? 1 : current_tune
->constant_limit
;
2344 /* Emit an insn that's a simple single-set. Both the operands must be known
2346 inline static rtx_insn
*
2347 emit_set_insn (rtx x
, rtx y
)
2349 return emit_insn (gen_rtx_SET (x
, y
));
2352 /* Return the number of bits set in VALUE. */
2354 bit_count (unsigned long value
)
2356 unsigned long count
= 0;
2361 value
&= value
- 1; /* Clear the least-significant set bit. */
2367 /* Return the number of features in feature-set SET. */
2369 feature_count (const arm_feature_set
* set
)
2371 return (bit_count (ARM_FSET_CPU1 (*set
))
2372 + bit_count (ARM_FSET_CPU2 (*set
)));
2379 } arm_fixed_mode_set
;
2381 /* A small helper for setting fixed-point library libfuncs. */
2384 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2385 const char *funcname
, const char *modename
,
2390 if (num_suffix
== 0)
2391 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2393 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2395 set_optab_libfunc (optable
, mode
, buffer
);
2399 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2400 machine_mode from
, const char *funcname
,
2401 const char *toname
, const char *fromname
)
2404 const char *maybe_suffix_2
= "";
2406 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2407 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2408 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2409 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2410 maybe_suffix_2
= "2";
2412 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2415 set_conv_libfunc (optable
, to
, from
, buffer
);
2418 /* Set up library functions unique to ARM. */
2421 arm_init_libfuncs (void)
2423 /* For Linux, we have access to kernel support for atomic operations. */
2424 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2425 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2427 /* There are no special library functions unless we are using the
2432 /* The functions below are described in Section 4 of the "Run-Time
2433 ABI for the ARM architecture", Version 1.0. */
2435 /* Double-precision floating-point arithmetic. Table 2. */
2436 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2437 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2438 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2439 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2440 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2442 /* Double-precision comparisons. Table 3. */
2443 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2444 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2445 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2446 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2447 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2448 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2449 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2451 /* Single-precision floating-point arithmetic. Table 4. */
2452 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2453 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2454 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2455 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2456 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2458 /* Single-precision comparisons. Table 5. */
2459 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2460 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2461 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2462 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2463 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2464 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2465 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2467 /* Floating-point to integer conversions. Table 6. */
2468 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2469 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2470 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2471 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2472 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2473 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2474 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2475 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2477 /* Conversions between floating types. Table 7. */
2478 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2479 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2481 /* Integer to floating-point conversions. Table 8. */
2482 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2483 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2484 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2485 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2486 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2487 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2488 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2489 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2491 /* Long long. Table 9. */
2492 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2493 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2494 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2495 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2496 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2497 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2498 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2499 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2501 /* Integer (32/32->32) division. \S 4.3.1. */
2502 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2503 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2505 /* The divmod functions are designed so that they can be used for
2506 plain division, even though they return both the quotient and the
2507 remainder. The quotient is returned in the usual location (i.e.,
2508 r0 for SImode, {r0, r1} for DImode), just as would be expected
2509 for an ordinary division routine. Because the AAPCS calling
2510 conventions specify that all of { r0, r1, r2, r3 } are
2511 callee-saved registers, there is no need to tell the compiler
2512 explicitly that those registers are clobbered by these
2514 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2515 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2517 /* For SImode division the ABI provides div-without-mod routines,
2518 which are faster. */
2519 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2520 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2522 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2523 divmod libcalls instead. */
2524 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2525 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2526 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2527 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2529 /* Half-precision float operations. The compiler handles all operations
2530 with NULL libfuncs by converting the SFmode. */
2531 switch (arm_fp16_format
)
2533 case ARM_FP16_FORMAT_IEEE
:
2534 case ARM_FP16_FORMAT_ALTERNATIVE
:
2537 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2538 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2540 : "__gnu_f2h_alternative"));
2541 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2542 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2544 : "__gnu_h2f_alternative"));
2547 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2548 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2549 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2550 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2551 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2554 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2555 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2556 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2557 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2558 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2559 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2560 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2567 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2569 const arm_fixed_mode_set fixed_arith_modes
[] =
2590 const arm_fixed_mode_set fixed_conv_modes
[] =
2620 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2622 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2623 "add", fixed_arith_modes
[i
].name
, 3);
2624 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2625 "ssadd", fixed_arith_modes
[i
].name
, 3);
2626 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2627 "usadd", fixed_arith_modes
[i
].name
, 3);
2628 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2629 "sub", fixed_arith_modes
[i
].name
, 3);
2630 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2631 "sssub", fixed_arith_modes
[i
].name
, 3);
2632 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2633 "ussub", fixed_arith_modes
[i
].name
, 3);
2634 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2635 "mul", fixed_arith_modes
[i
].name
, 3);
2636 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2637 "ssmul", fixed_arith_modes
[i
].name
, 3);
2638 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2639 "usmul", fixed_arith_modes
[i
].name
, 3);
2640 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2641 "div", fixed_arith_modes
[i
].name
, 3);
2642 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2643 "udiv", fixed_arith_modes
[i
].name
, 3);
2644 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2645 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2646 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2647 "usdiv", fixed_arith_modes
[i
].name
, 3);
2648 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2649 "neg", fixed_arith_modes
[i
].name
, 2);
2650 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2651 "ssneg", fixed_arith_modes
[i
].name
, 2);
2652 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2653 "usneg", fixed_arith_modes
[i
].name
, 2);
2654 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2655 "ashl", fixed_arith_modes
[i
].name
, 3);
2656 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2657 "ashr", fixed_arith_modes
[i
].name
, 3);
2658 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2659 "lshr", fixed_arith_modes
[i
].name
, 3);
2660 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2661 "ssashl", fixed_arith_modes
[i
].name
, 3);
2662 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2663 "usashl", fixed_arith_modes
[i
].name
, 3);
2664 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2665 "cmp", fixed_arith_modes
[i
].name
, 2);
2668 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2669 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2672 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2673 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2676 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2677 fixed_conv_modes
[j
].mode
, "fract",
2678 fixed_conv_modes
[i
].name
,
2679 fixed_conv_modes
[j
].name
);
2680 arm_set_fixed_conv_libfunc (satfract_optab
,
2681 fixed_conv_modes
[i
].mode
,
2682 fixed_conv_modes
[j
].mode
, "satfract",
2683 fixed_conv_modes
[i
].name
,
2684 fixed_conv_modes
[j
].name
);
2685 arm_set_fixed_conv_libfunc (fractuns_optab
,
2686 fixed_conv_modes
[i
].mode
,
2687 fixed_conv_modes
[j
].mode
, "fractuns",
2688 fixed_conv_modes
[i
].name
,
2689 fixed_conv_modes
[j
].name
);
2690 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2691 fixed_conv_modes
[i
].mode
,
2692 fixed_conv_modes
[j
].mode
, "satfractuns",
2693 fixed_conv_modes
[i
].name
,
2694 fixed_conv_modes
[j
].name
);
2698 if (TARGET_AAPCS_BASED
)
2699 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2702 /* On AAPCS systems, this is the "struct __va_list". */
2703 static GTY(()) tree va_list_type
;
2705 /* Return the type to use as __builtin_va_list. */
2707 arm_build_builtin_va_list (void)
2712 if (!TARGET_AAPCS_BASED
)
2713 return std_build_builtin_va_list ();
2715 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2723 The C Library ABI further reinforces this definition in \S
2726 We must follow this definition exactly. The structure tag
2727 name is visible in C++ mangled names, and thus forms a part
2728 of the ABI. The field name may be used by people who
2729 #include <stdarg.h>. */
2730 /* Create the type. */
2731 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2732 /* Give it the required name. */
2733 va_list_name
= build_decl (BUILTINS_LOCATION
,
2735 get_identifier ("__va_list"),
2737 DECL_ARTIFICIAL (va_list_name
) = 1;
2738 TYPE_NAME (va_list_type
) = va_list_name
;
2739 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2740 /* Create the __ap field. */
2741 ap_field
= build_decl (BUILTINS_LOCATION
,
2743 get_identifier ("__ap"),
2745 DECL_ARTIFICIAL (ap_field
) = 1;
2746 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2747 TYPE_FIELDS (va_list_type
) = ap_field
;
2748 /* Compute its layout. */
2749 layout_type (va_list_type
);
2751 return va_list_type
;
2754 /* Return an expression of type "void *" pointing to the next
2755 available argument in a variable-argument list. VALIST is the
2756 user-level va_list object, of type __builtin_va_list. */
2758 arm_extract_valist_ptr (tree valist
)
2760 if (TREE_TYPE (valist
) == error_mark_node
)
2761 return error_mark_node
;
2763 /* On an AAPCS target, the pointer is stored within "struct
2765 if (TARGET_AAPCS_BASED
)
2767 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2768 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2769 valist
, ap_field
, NULL_TREE
);
2775 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2777 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2779 valist
= arm_extract_valist_ptr (valist
);
2780 std_expand_builtin_va_start (valist
, nextarg
);
2783 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2785 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2788 valist
= arm_extract_valist_ptr (valist
);
2789 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2792 /* Check any incompatible options that the user has specified. */
2794 arm_option_check_internal (struct gcc_options
*opts
)
2796 int flags
= opts
->x_target_flags
;
2797 const struct arm_fpu_desc
*fpu_desc
= &all_fpus
[opts
->x_arm_fpu_index
];
2799 /* iWMMXt and NEON are incompatible. */
2800 if (TARGET_IWMMXT
&& TARGET_VFP
2801 && ARM_FPU_FSET_HAS (fpu_desc
->features
, FPU_FL_NEON
))
2802 error ("iWMMXt and NEON are incompatible");
2804 /* Make sure that the processor choice does not conflict with any of the
2805 other command line choices. */
2806 if (TARGET_ARM_P (flags
) && !ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
))
2807 error ("target CPU does not support ARM mode");
2809 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2810 from here where no function is being compiled currently. */
2811 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2812 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2814 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2815 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2817 /* If this target is normally configured to use APCS frames, warn if they
2818 are turned off and debugging is turned on. */
2819 if (TARGET_ARM_P (flags
)
2820 && write_symbols
!= NO_DEBUG
2821 && !TARGET_APCS_FRAME
2822 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2823 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2825 /* iWMMXt unsupported under Thumb mode. */
2826 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2827 error ("iWMMXt unsupported under Thumb mode");
2829 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2830 error ("can not use -mtp=cp15 with 16-bit Thumb");
2832 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2834 error ("RTP PIC is incompatible with Thumb");
2838 /* We only support -mslow-flash-data on armv7-m targets. */
2839 if (target_slow_flash_data
2840 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2841 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2842 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2845 /* Recompute the global settings depending on target attribute options. */
2848 arm_option_params_internal (void)
2850 /* If we are not using the default (ARM mode) section anchor offset
2851 ranges, then set the correct ranges now. */
2854 /* Thumb-1 LDR instructions cannot have negative offsets.
2855 Permissible positive offset ranges are 5-bit (for byte loads),
2856 6-bit (for halfword loads), or 7-bit (for word loads).
2857 Empirical results suggest a 7-bit anchor range gives the best
2858 overall code size. */
2859 targetm
.min_anchor_offset
= 0;
2860 targetm
.max_anchor_offset
= 127;
2862 else if (TARGET_THUMB2
)
2864 /* The minimum is set such that the total size of the block
2865 for a particular anchor is 248 + 1 + 4095 bytes, which is
2866 divisible by eight, ensuring natural spacing of anchors. */
2867 targetm
.min_anchor_offset
= -248;
2868 targetm
.max_anchor_offset
= 4095;
2872 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2873 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2878 /* If optimizing for size, bump the number of instructions that we
2879 are prepared to conditionally execute (even on a StrongARM). */
2880 max_insns_skipped
= 6;
2882 /* For THUMB2, we limit the conditional sequence to one IT block. */
2884 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2887 /* When -mrestrict-it is in use tone down the if-conversion. */
2888 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2889 ? 1 : current_tune
->max_insns_skipped
;
2892 /* True if -mflip-thumb should next add an attribute for the default
2893 mode, false if it should next add an attribute for the opposite mode. */
2894 static GTY(()) bool thumb_flipper
;
2896 /* Options after initial target override. */
2897 static GTY(()) tree init_optimize
;
2900 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2902 if (opts
->x_align_functions
<= 0)
2903 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2904 && opts
->x_optimize_size
? 2 : 4;
2907 /* Implement targetm.override_options_after_change. */
2910 arm_override_options_after_change (void)
2912 arm_override_options_after_change_1 (&global_options
);
2915 /* Reset options between modes that the user has specified. */
2917 arm_option_override_internal (struct gcc_options
*opts
,
2918 struct gcc_options
*opts_set
)
2920 arm_override_options_after_change_1 (opts
);
2922 if (TARGET_INTERWORK
&& !ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
))
2924 /* The default is to enable interworking, so this warning message would
2925 be confusing to users who have just compiled with, eg, -march=armv3. */
2926 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2927 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2930 if (TARGET_THUMB_P (opts
->x_target_flags
)
2931 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
)))
2933 warning (0, "target CPU does not support THUMB instructions");
2934 opts
->x_target_flags
&= ~MASK_THUMB
;
2937 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2939 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2940 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2943 /* Callee super interworking implies thumb interworking. Adding
2944 this to the flags here simplifies the logic elsewhere. */
2945 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2946 opts
->x_target_flags
|= MASK_INTERWORK
;
2948 /* need to remember initial values so combinaisons of options like
2949 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2950 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2952 if (! opts_set
->x_arm_restrict_it
)
2953 opts
->x_arm_restrict_it
= arm_arch8
;
2955 /* ARM execution state and M profile don't have [restrict] IT. */
2956 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2957 opts
->x_arm_restrict_it
= 0;
2959 /* Enable -munaligned-access by default for
2960 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2961 i.e. Thumb2 and ARM state only.
2962 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2963 - ARMv8 architecture-base processors.
2965 Disable -munaligned-access by default for
2966 - all pre-ARMv6 architecture-based processors
2967 - ARMv6-M architecture-based processors
2968 - ARMv8-M Baseline processors. */
2970 if (! opts_set
->x_unaligned_access
)
2972 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2973 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2975 else if (opts
->x_unaligned_access
== 1
2976 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2978 warning (0, "target CPU does not support unaligned accesses");
2979 opts
->x_unaligned_access
= 0;
2982 /* Don't warn since it's on by default in -O2. */
2983 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2984 opts
->x_flag_schedule_insns
= 0;
2986 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
2988 /* Disable shrink-wrap when optimizing function for size, since it tends to
2989 generate additional returns. */
2990 if (optimize_function_for_size_p (cfun
)
2991 && TARGET_THUMB2_P (opts
->x_target_flags
))
2992 opts
->x_flag_shrink_wrap
= false;
2994 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
2996 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2997 - epilogue_insns - does not accurately model the corresponding insns
2998 emitted in the asm file. In particular, see the comment in thumb_exit
2999 'Find out how many of the (return) argument registers we can corrupt'.
3000 As a consequence, the epilogue may clobber registers without fipa-ra
3001 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3002 TODO: Accurately model clobbers for epilogue_insns and reenable
3004 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3005 opts
->x_flag_ipa_ra
= 0;
3007 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3009 /* Thumb2 inline assembly code should always use unified syntax.
3010 This will apply to ARM and Thumb1 eventually. */
3011 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3013 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3014 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3018 /* Fix up any incompatible options that the user has specified. */
3020 arm_option_override (void)
3022 arm_selected_arch
= NULL
;
3023 arm_selected_cpu
= NULL
;
3024 arm_selected_tune
= NULL
;
3026 if (global_options_set
.x_arm_arch_option
)
3027 arm_selected_arch
= &all_architectures
[arm_arch_option
];
3029 if (global_options_set
.x_arm_cpu_option
)
3031 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
3032 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
3035 if (global_options_set
.x_arm_tune_option
)
3036 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
3038 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3039 SUBTARGET_OVERRIDE_OPTIONS
;
3042 if (arm_selected_arch
)
3044 if (arm_selected_cpu
)
3046 const arm_feature_set tuning_flags
= ARM_FSET_MAKE_CPU1 (FL_TUNE
);
3047 arm_feature_set selected_flags
;
3048 ARM_FSET_XOR (selected_flags
, arm_selected_cpu
->flags
,
3049 arm_selected_arch
->flags
);
3050 ARM_FSET_EXCLUDE (selected_flags
, selected_flags
, tuning_flags
);
3051 /* Check for conflict between mcpu and march. */
3052 if (!ARM_FSET_IS_EMPTY (selected_flags
))
3054 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3055 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3056 /* -march wins for code generation.
3057 -mcpu wins for default tuning. */
3058 if (!arm_selected_tune
)
3059 arm_selected_tune
= arm_selected_cpu
;
3061 arm_selected_cpu
= arm_selected_arch
;
3065 arm_selected_arch
= NULL
;
3068 /* Pick a CPU based on the architecture. */
3069 arm_selected_cpu
= arm_selected_arch
;
3072 /* If the user did not specify a processor, choose one for them. */
3073 if (!arm_selected_cpu
)
3075 const struct processors
* sel
;
3076 arm_feature_set sought
= ARM_FSET_EMPTY
;;
3078 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3079 if (!arm_selected_cpu
->name
)
3081 #ifdef SUBTARGET_CPU_DEFAULT
3082 /* Use the subtarget default CPU if none was specified by
3084 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
3086 /* Default to ARM6. */
3087 if (!arm_selected_cpu
->name
)
3088 arm_selected_cpu
= &all_cores
[arm6
];
3091 sel
= arm_selected_cpu
;
3092 insn_flags
= sel
->flags
;
3094 /* Now check to see if the user has specified some command line
3095 switch that require certain abilities from the cpu. */
3097 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3099 ARM_FSET_ADD_CPU1 (sought
, FL_THUMB
);
3100 ARM_FSET_ADD_CPU1 (sought
, FL_MODE32
);
3102 /* There are no ARM processors that support both APCS-26 and
3103 interworking. Therefore we force FL_MODE26 to be removed
3104 from insn_flags here (if it was set), so that the search
3105 below will always be able to find a compatible processor. */
3106 ARM_FSET_DEL_CPU1 (insn_flags
, FL_MODE26
);
3109 if (!ARM_FSET_IS_EMPTY (sought
)
3110 && !(ARM_FSET_CPU_SUBSET (sought
, insn_flags
)))
3112 /* Try to locate a CPU type that supports all of the abilities
3113 of the default CPU, plus the extra abilities requested by
3115 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3116 if (ARM_FSET_CPU_SUBSET (sought
, sel
->flags
))
3119 if (sel
->name
== NULL
)
3121 unsigned current_bit_count
= 0;
3122 const struct processors
* best_fit
= NULL
;
3124 /* Ideally we would like to issue an error message here
3125 saying that it was not possible to find a CPU compatible
3126 with the default CPU, but which also supports the command
3127 line options specified by the programmer, and so they
3128 ought to use the -mcpu=<name> command line option to
3129 override the default CPU type.
3131 If we cannot find a cpu that has both the
3132 characteristics of the default cpu and the given
3133 command line options we scan the array again looking
3134 for a best match. */
3135 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3137 arm_feature_set required
= ARM_FSET_EMPTY
;
3138 ARM_FSET_UNION (required
, sought
, insn_flags
);
3139 if (ARM_FSET_CPU_SUBSET (required
, sel
->flags
))
3142 arm_feature_set flags
;
3143 ARM_FSET_INTER (flags
, sel
->flags
, insn_flags
);
3144 count
= feature_count (&flags
);
3146 if (count
>= current_bit_count
)
3149 current_bit_count
= count
;
3153 gcc_assert (best_fit
);
3157 arm_selected_cpu
= sel
;
3161 gcc_assert (arm_selected_cpu
);
3162 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3163 if (!arm_selected_tune
)
3164 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3166 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
3167 insn_flags
= arm_selected_cpu
->flags
;
3168 arm_base_arch
= arm_selected_cpu
->base_arch
;
3170 arm_tune
= arm_selected_tune
->core
;
3171 tune_flags
= arm_selected_tune
->flags
;
3172 current_tune
= arm_selected_tune
->tune
;
3174 /* TBD: Dwarf info for apcs frame is not handled yet. */
3175 if (TARGET_APCS_FRAME
)
3176 flag_shrink_wrap
= false;
3178 /* BPABI targets use linker tricks to allow interworking on cores
3179 without thumb support. */
3180 if (TARGET_INTERWORK
3181 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
) || TARGET_BPABI
))
3183 warning (0, "target CPU does not support interworking" );
3184 target_flags
&= ~MASK_INTERWORK
;
3187 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3189 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3190 target_flags
|= MASK_APCS_FRAME
;
3193 if (TARGET_POKE_FUNCTION_NAME
)
3194 target_flags
|= MASK_APCS_FRAME
;
3196 if (TARGET_APCS_REENT
&& flag_pic
)
3197 error ("-fpic and -mapcs-reent are incompatible");
3199 if (TARGET_APCS_REENT
)
3200 warning (0, "APCS reentrant code not supported. Ignored");
3202 if (TARGET_APCS_FLOAT
)
3203 warning (0, "passing floating point arguments in fp regs not yet supported");
3205 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3206 arm_arch3m
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH3M
);
3207 arm_arch4
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH4
);
3208 arm_arch4t
= arm_arch4
&& (ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
));
3209 arm_arch5
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5
);
3210 arm_arch5e
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5E
);
3211 arm_arch6
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6
);
3212 arm_arch6k
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6K
);
3213 arm_arch6kz
= arm_arch6k
&& ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6KZ
);
3214 arm_arch_notm
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
);
3215 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3216 arm_arch7
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7
);
3217 arm_arch7em
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7EM
);
3218 arm_arch8
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH8
);
3219 arm_arch8_1
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_ARCH8_1
);
3220 arm_arch_thumb1
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
);
3221 arm_arch_thumb2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB2
);
3222 arm_arch_xscale
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_XSCALE
);
3224 arm_ld_sched
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_LDSCHED
);
3225 arm_tune_strongarm
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_STRONG
);
3226 arm_tune_wbuf
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_WBUF
);
3227 arm_tune_xscale
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_XSCALE
);
3228 arm_arch_iwmmxt
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT
);
3229 arm_arch_iwmmxt2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT2
);
3230 arm_arch_thumb_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB_DIV
);
3231 arm_arch_arm_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARM_DIV
);
3232 arm_arch_no_volatile_ce
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NO_VOLATILE_CE
);
3233 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
3234 arm_arch_crc
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_CRC32
);
3235 arm_m_profile_small_mul
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_SMALLMUL
);
3237 /* V5 code we generate is completely interworking capable, so we turn off
3238 TARGET_INTERWORK here to avoid many tests later on. */
3240 /* XXX However, we must pass the right pre-processor defines to CPP
3241 or GLD can get confused. This is a hack. */
3242 if (TARGET_INTERWORK
)
3243 arm_cpp_interwork
= 1;
3246 target_flags
&= ~MASK_INTERWORK
;
3248 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3249 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3251 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3252 error ("iwmmxt abi requires an iwmmxt capable cpu");
3254 if (!global_options_set
.x_arm_fpu_index
)
3256 const char *target_fpu_name
;
3259 #ifdef FPUTYPE_DEFAULT
3260 target_fpu_name
= FPUTYPE_DEFAULT
;
3262 target_fpu_name
= "vfp";
3265 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
3270 /* If soft-float is specified then don't use FPU. */
3271 if (TARGET_SOFT_FLOAT
)
3272 arm_fpu_attr
= FPU_NONE
;
3273 else if (TARGET_VFP
)
3274 arm_fpu_attr
= FPU_VFP
;
3278 if (TARGET_AAPCS_BASED
)
3280 if (TARGET_CALLER_INTERWORKING
)
3281 error ("AAPCS does not support -mcaller-super-interworking");
3283 if (TARGET_CALLEE_INTERWORKING
)
3284 error ("AAPCS does not support -mcallee-super-interworking");
3287 /* __fp16 support currently assumes the core has ldrh. */
3288 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3289 sorry ("__fp16 and no ldrh");
3291 if (TARGET_AAPCS_BASED
)
3293 if (arm_abi
== ARM_ABI_IWMMXT
)
3294 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3295 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3296 && TARGET_HARD_FLOAT
3298 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3300 arm_pcs_default
= ARM_PCS_AAPCS
;
3304 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
3305 sorry ("-mfloat-abi=hard and VFP");
3307 if (arm_abi
== ARM_ABI_APCS
)
3308 arm_pcs_default
= ARM_PCS_APCS
;
3310 arm_pcs_default
= ARM_PCS_ATPCS
;
3313 /* For arm2/3 there is no need to do any scheduling if we are doing
3314 software floating-point. */
3315 if (TARGET_SOFT_FLOAT
&& !ARM_FSET_HAS_CPU1 (tune_flags
, FL_MODE32
))
3316 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3318 /* Use the cp15 method if it is available. */
3319 if (target_thread_pointer
== TP_AUTO
)
3321 if (arm_arch6k
&& !TARGET_THUMB1
)
3322 target_thread_pointer
= TP_CP15
;
3324 target_thread_pointer
= TP_SOFT
;
3327 /* Override the default structure alignment for AAPCS ABI. */
3328 if (!global_options_set
.x_arm_structure_size_boundary
)
3330 if (TARGET_AAPCS_BASED
)
3331 arm_structure_size_boundary
= 8;
3335 if (arm_structure_size_boundary
!= 8
3336 && arm_structure_size_boundary
!= 32
3337 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3339 if (ARM_DOUBLEWORD_ALIGN
)
3341 "structure size boundary can only be set to 8, 32 or 64");
3343 warning (0, "structure size boundary can only be set to 8 or 32");
3344 arm_structure_size_boundary
3345 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3349 /* If stack checking is disabled, we can use r10 as the PIC register,
3350 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3351 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3353 if (TARGET_VXWORKS_RTP
)
3354 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3355 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3358 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3359 arm_pic_register
= 9;
3361 if (arm_pic_register_string
!= NULL
)
3363 int pic_register
= decode_reg_name (arm_pic_register_string
);
3366 warning (0, "-mpic-register= is useless without -fpic");
3368 /* Prevent the user from choosing an obviously stupid PIC register. */
3369 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3370 || pic_register
== HARD_FRAME_POINTER_REGNUM
3371 || pic_register
== STACK_POINTER_REGNUM
3372 || pic_register
>= PC_REGNUM
3373 || (TARGET_VXWORKS_RTP
3374 && (unsigned int) pic_register
!= arm_pic_register
))
3375 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3377 arm_pic_register
= pic_register
;
3380 if (TARGET_VXWORKS_RTP
3381 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3382 arm_pic_data_is_text_relative
= 0;
3384 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3385 if (fix_cm3_ldrd
== 2)
3387 if (arm_selected_cpu
->core
== cortexm3
)
3393 /* Hot/Cold partitioning is not currently supported, since we can't
3394 handle literal pool placement in that case. */
3395 if (flag_reorder_blocks_and_partition
)
3397 inform (input_location
,
3398 "-freorder-blocks-and-partition not supported on this architecture");
3399 flag_reorder_blocks_and_partition
= 0;
3400 flag_reorder_blocks
= 1;
3404 /* Hoisting PIC address calculations more aggressively provides a small,
3405 but measurable, size reduction for PIC code. Therefore, we decrease
3406 the bar for unrestricted expression hoisting to the cost of PIC address
3407 calculation, which is 2 instructions. */
3408 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3409 global_options
.x_param_values
,
3410 global_options_set
.x_param_values
);
3412 /* ARM EABI defaults to strict volatile bitfields. */
3413 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3414 && abi_version_at_least(2))
3415 flag_strict_volatile_bitfields
= 1;
3417 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3418 have deemed it beneficial (signified by setting
3419 prefetch.num_slots to 1 or more). */
3420 if (flag_prefetch_loop_arrays
< 0
3423 && current_tune
->prefetch
.num_slots
> 0)
3424 flag_prefetch_loop_arrays
= 1;
3426 /* Set up parameters to be used in prefetching algorithm. Do not
3427 override the defaults unless we are tuning for a core we have
3428 researched values for. */
3429 if (current_tune
->prefetch
.num_slots
> 0)
3430 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3431 current_tune
->prefetch
.num_slots
,
3432 global_options
.x_param_values
,
3433 global_options_set
.x_param_values
);
3434 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3435 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3436 current_tune
->prefetch
.l1_cache_line_size
,
3437 global_options
.x_param_values
,
3438 global_options_set
.x_param_values
);
3439 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3440 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3441 current_tune
->prefetch
.l1_cache_size
,
3442 global_options
.x_param_values
,
3443 global_options_set
.x_param_values
);
3445 /* Use Neon to perform 64-bits operations rather than core
3447 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3448 if (use_neon_for_64bits
== 1)
3449 prefer_neon_for_64bits
= true;
3451 /* Use the alternative scheduling-pressure algorithm by default. */
3452 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3453 global_options
.x_param_values
,
3454 global_options_set
.x_param_values
);
3456 /* Look through ready list and all of queue for instructions
3457 relevant for L2 auto-prefetcher. */
3458 int param_sched_autopref_queue_depth
;
3460 switch (current_tune
->sched_autopref
)
3462 case tune_params::SCHED_AUTOPREF_OFF
:
3463 param_sched_autopref_queue_depth
= -1;
3466 case tune_params::SCHED_AUTOPREF_RANK
:
3467 param_sched_autopref_queue_depth
= 0;
3470 case tune_params::SCHED_AUTOPREF_FULL
:
3471 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3478 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3479 param_sched_autopref_queue_depth
,
3480 global_options
.x_param_values
,
3481 global_options_set
.x_param_values
);
3483 /* Currently, for slow flash data, we just disable literal pools. */
3484 if (target_slow_flash_data
)
3485 arm_disable_literal_pool
= true;
3487 /* Disable scheduling fusion by default if it's not armv7 processor
3488 or doesn't prefer ldrd/strd. */
3489 if (flag_schedule_fusion
== 2
3490 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3491 flag_schedule_fusion
= 0;
3493 /* Need to remember initial options before they are overriden. */
3494 init_optimize
= build_optimization_node (&global_options
);
3496 arm_option_override_internal (&global_options
, &global_options_set
);
3497 arm_option_check_internal (&global_options
);
3498 arm_option_params_internal ();
3500 /* Register global variables with the garbage collector. */
3501 arm_add_gc_roots ();
3503 /* Save the initial options in case the user does function specific
3504 options or #pragma target. */
3505 target_option_default_node
= target_option_current_node
3506 = build_target_option_node (&global_options
);
3508 /* Init initial mode for testing. */
3509 thumb_flipper
= TARGET_THUMB
;
3513 arm_add_gc_roots (void)
3515 gcc_obstack_init(&minipool_obstack
);
3516 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3519 /* A table of known ARM exception types.
3520 For use with the interrupt function attribute. */
3524 const char *const arg
;
3525 const unsigned long return_value
;
3529 static const isr_attribute_arg isr_attribute_args
[] =
3531 { "IRQ", ARM_FT_ISR
},
3532 { "irq", ARM_FT_ISR
},
3533 { "FIQ", ARM_FT_FIQ
},
3534 { "fiq", ARM_FT_FIQ
},
3535 { "ABORT", ARM_FT_ISR
},
3536 { "abort", ARM_FT_ISR
},
3537 { "ABORT", ARM_FT_ISR
},
3538 { "abort", ARM_FT_ISR
},
3539 { "UNDEF", ARM_FT_EXCEPTION
},
3540 { "undef", ARM_FT_EXCEPTION
},
3541 { "SWI", ARM_FT_EXCEPTION
},
3542 { "swi", ARM_FT_EXCEPTION
},
3543 { NULL
, ARM_FT_NORMAL
}
3546 /* Returns the (interrupt) function type of the current
3547 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3549 static unsigned long
3550 arm_isr_value (tree argument
)
3552 const isr_attribute_arg
* ptr
;
3556 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3558 /* No argument - default to IRQ. */
3559 if (argument
== NULL_TREE
)
3562 /* Get the value of the argument. */
3563 if (TREE_VALUE (argument
) == NULL_TREE
3564 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3565 return ARM_FT_UNKNOWN
;
3567 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3569 /* Check it against the list of known arguments. */
3570 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3571 if (streq (arg
, ptr
->arg
))
3572 return ptr
->return_value
;
3574 /* An unrecognized interrupt type. */
3575 return ARM_FT_UNKNOWN
;
3578 /* Computes the type of the current function. */
3580 static unsigned long
3581 arm_compute_func_type (void)
3583 unsigned long type
= ARM_FT_UNKNOWN
;
3587 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3589 /* Decide if the current function is volatile. Such functions
3590 never return, and many memory cycles can be saved by not storing
3591 register values that will never be needed again. This optimization
3592 was added to speed up context switching in a kernel application. */
3594 && (TREE_NOTHROW (current_function_decl
)
3595 || !(flag_unwind_tables
3597 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3598 && TREE_THIS_VOLATILE (current_function_decl
))
3599 type
|= ARM_FT_VOLATILE
;
3601 if (cfun
->static_chain_decl
!= NULL
)
3602 type
|= ARM_FT_NESTED
;
3604 attr
= DECL_ATTRIBUTES (current_function_decl
);
3606 a
= lookup_attribute ("naked", attr
);
3608 type
|= ARM_FT_NAKED
;
3610 a
= lookup_attribute ("isr", attr
);
3612 a
= lookup_attribute ("interrupt", attr
);
3615 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3617 type
|= arm_isr_value (TREE_VALUE (a
));
3622 /* Returns the type of the current function. */
3625 arm_current_func_type (void)
3627 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3628 cfun
->machine
->func_type
= arm_compute_func_type ();
3630 return cfun
->machine
->func_type
;
3634 arm_allocate_stack_slots_for_args (void)
3636 /* Naked functions should not allocate stack slots for arguments. */
3637 return !IS_NAKED (arm_current_func_type ());
3641 arm_warn_func_return (tree decl
)
3643 /* Naked functions are implemented entirely in assembly, including the
3644 return sequence, so suppress warnings about this. */
3645 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3649 /* Output assembler code for a block containing the constant parts
3650 of a trampoline, leaving space for the variable parts.
3652 On the ARM, (if r8 is the static chain regnum, and remembering that
3653 referencing pc adds an offset of 8) the trampoline looks like:
3656 .word static chain value
3657 .word function's address
3658 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3661 arm_asm_trampoline_template (FILE *f
)
3663 fprintf (f
, "\t.syntax unified\n");
3667 fprintf (f
, "\t.arm\n");
3668 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3669 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3671 else if (TARGET_THUMB2
)
3673 fprintf (f
, "\t.thumb\n");
3674 /* The Thumb-2 trampoline is similar to the arm implementation.
3675 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3676 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3677 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3678 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3682 ASM_OUTPUT_ALIGN (f
, 2);
3683 fprintf (f
, "\t.code\t16\n");
3684 fprintf (f
, ".Ltrampoline_start:\n");
3685 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3686 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3687 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3688 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3689 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3690 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3692 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3693 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3696 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3699 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3701 rtx fnaddr
, mem
, a_tramp
;
3703 emit_block_move (m_tramp
, assemble_trampoline_template (),
3704 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3706 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3707 emit_move_insn (mem
, chain_value
);
3709 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3710 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3711 emit_move_insn (mem
, fnaddr
);
3713 a_tramp
= XEXP (m_tramp
, 0);
3714 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3715 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3716 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3719 /* Thumb trampolines should be entered in thumb mode, so set
3720 the bottom bit of the address. */
3723 arm_trampoline_adjust_address (rtx addr
)
3726 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3727 NULL
, 0, OPTAB_LIB_WIDEN
);
3731 /* Return 1 if it is possible to return using a single instruction.
3732 If SIBLING is non-null, this is a test for a return before a sibling
3733 call. SIBLING is the call insn, so we can examine its register usage. */
3736 use_return_insn (int iscond
, rtx sibling
)
3739 unsigned int func_type
;
3740 unsigned long saved_int_regs
;
3741 unsigned HOST_WIDE_INT stack_adjust
;
3742 arm_stack_offsets
*offsets
;
3744 /* Never use a return instruction before reload has run. */
3745 if (!reload_completed
)
3748 func_type
= arm_current_func_type ();
3750 /* Naked, volatile and stack alignment functions need special
3752 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3755 /* So do interrupt functions that use the frame pointer and Thumb
3756 interrupt functions. */
3757 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3760 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3761 && !optimize_function_for_size_p (cfun
))
3764 offsets
= arm_get_frame_offsets ();
3765 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3767 /* As do variadic functions. */
3768 if (crtl
->args
.pretend_args_size
3769 || cfun
->machine
->uses_anonymous_args
3770 /* Or if the function calls __builtin_eh_return () */
3771 || crtl
->calls_eh_return
3772 /* Or if the function calls alloca */
3773 || cfun
->calls_alloca
3774 /* Or if there is a stack adjustment. However, if the stack pointer
3775 is saved on the stack, we can use a pre-incrementing stack load. */
3776 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3777 && stack_adjust
== 4))
3778 /* Or if the static chain register was saved above the frame, under the
3779 assumption that the stack pointer isn't saved on the stack. */
3780 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3781 && arm_compute_static_chain_stack_bytes() != 0))
3784 saved_int_regs
= offsets
->saved_regs_mask
;
3786 /* Unfortunately, the insn
3788 ldmib sp, {..., sp, ...}
3790 triggers a bug on most SA-110 based devices, such that the stack
3791 pointer won't be correctly restored if the instruction takes a
3792 page fault. We work around this problem by popping r3 along with
3793 the other registers, since that is never slower than executing
3794 another instruction.
3796 We test for !arm_arch5 here, because code for any architecture
3797 less than this could potentially be run on one of the buggy
3799 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3801 /* Validate that r3 is a call-clobbered register (always true in
3802 the default abi) ... */
3803 if (!call_used_regs
[3])
3806 /* ... that it isn't being used for a return value ... */
3807 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3810 /* ... or for a tail-call argument ... */
3813 gcc_assert (CALL_P (sibling
));
3815 if (find_regno_fusage (sibling
, USE
, 3))
3819 /* ... and that there are no call-saved registers in r0-r2
3820 (always true in the default ABI). */
3821 if (saved_int_regs
& 0x7)
3825 /* Can't be done if interworking with Thumb, and any registers have been
3827 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3830 /* On StrongARM, conditional returns are expensive if they aren't
3831 taken and multiple registers have been stacked. */
3832 if (iscond
&& arm_tune_strongarm
)
3834 /* Conditional return when just the LR is stored is a simple
3835 conditional-load instruction, that's not expensive. */
3836 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3840 && arm_pic_register
!= INVALID_REGNUM
3841 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3845 /* If there are saved registers but the LR isn't saved, then we need
3846 two instructions for the return. */
3847 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3850 /* Can't be done if any of the VFP regs are pushed,
3851 since this also requires an insn. */
3852 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3853 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3854 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3857 if (TARGET_REALLY_IWMMXT
)
3858 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3859 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3865 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3866 shrink-wrapping if possible. This is the case if we need to emit a
3867 prologue, which we can test by looking at the offsets. */
3869 use_simple_return_p (void)
3871 arm_stack_offsets
*offsets
;
3873 offsets
= arm_get_frame_offsets ();
3874 return offsets
->outgoing_args
!= 0;
3877 /* Return TRUE if int I is a valid immediate ARM constant. */
3880 const_ok_for_arm (HOST_WIDE_INT i
)
3884 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3885 be all zero, or all one. */
3886 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3887 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3888 != ((~(unsigned HOST_WIDE_INT
) 0)
3889 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3892 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3894 /* Fast return for 0 and small values. We must do this for zero, since
3895 the code below can't handle that one case. */
3896 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3899 /* Get the number of trailing zeros. */
3900 lowbit
= ffs((int) i
) - 1;
3902 /* Only even shifts are allowed in ARM mode so round down to the
3903 nearest even number. */
3907 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3912 /* Allow rotated constants in ARM mode. */
3914 && ((i
& ~0xc000003f) == 0
3915 || (i
& ~0xf000000f) == 0
3916 || (i
& ~0xfc000003) == 0))
3923 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3926 if (i
== v
|| i
== (v
| (v
<< 8)))
3929 /* Allow repeated pattern 0xXY00XY00. */
3939 /* Return true if I is a valid constant for the operation CODE. */
3941 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3943 if (const_ok_for_arm (i
))
3949 /* See if we can use movw. */
3950 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
3953 /* Otherwise, try mvn. */
3954 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3957 /* See if we can use addw or subw. */
3959 && ((i
& 0xfffff000) == 0
3960 || ((-i
) & 0xfffff000) == 0))
3962 /* else fall through. */
3982 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3984 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3990 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3994 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4001 /* Return true if I is a valid di mode constant for the operation CODE. */
4003 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4005 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4006 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4007 rtx hi
= GEN_INT (hi_val
);
4008 rtx lo
= GEN_INT (lo_val
);
4018 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4019 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4021 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4028 /* Emit a sequence of insns to handle a large constant.
4029 CODE is the code of the operation required, it can be any of SET, PLUS,
4030 IOR, AND, XOR, MINUS;
4031 MODE is the mode in which the operation is being performed;
4032 VAL is the integer to operate on;
4033 SOURCE is the other operand (a register, or a null-pointer for SET);
4034 SUBTARGETS means it is safe to create scratch registers if that will
4035 either produce a simpler sequence, or we will want to cse the values.
4036 Return value is the number of insns emitted. */
4038 /* ??? Tweak this for thumb2. */
4040 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4041 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4045 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4046 cond
= COND_EXEC_TEST (PATTERN (insn
));
4050 if (subtargets
|| code
== SET
4051 || (REG_P (target
) && REG_P (source
)
4052 && REGNO (target
) != REGNO (source
)))
4054 /* After arm_reorg has been called, we can't fix up expensive
4055 constants by pushing them into memory so we must synthesize
4056 them in-line, regardless of the cost. This is only likely to
4057 be more costly on chips that have load delay slots and we are
4058 compiling without running the scheduler (so no splitting
4059 occurred before the final instruction emission).
4061 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4063 if (!cfun
->machine
->after_arm_reorg
4065 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4067 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4072 /* Currently SET is the only monadic value for CODE, all
4073 the rest are diadic. */
4074 if (TARGET_USE_MOVT
)
4075 arm_emit_movpair (target
, GEN_INT (val
));
4077 emit_set_insn (target
, GEN_INT (val
));
4083 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4085 if (TARGET_USE_MOVT
)
4086 arm_emit_movpair (temp
, GEN_INT (val
));
4088 emit_set_insn (temp
, GEN_INT (val
));
4090 /* For MINUS, the value is subtracted from, since we never
4091 have subtraction of a constant. */
4093 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4095 emit_set_insn (target
,
4096 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4102 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4106 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4107 ARM/THUMB2 immediates, and add up to VAL.
4108 Thr function return value gives the number of insns required. */
4110 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4111 struct four_ints
*return_sequence
)
4113 int best_consecutive_zeros
= 0;
4117 struct four_ints tmp_sequence
;
4119 /* If we aren't targeting ARM, the best place to start is always at
4120 the bottom, otherwise look more closely. */
4123 for (i
= 0; i
< 32; i
+= 2)
4125 int consecutive_zeros
= 0;
4127 if (!(val
& (3 << i
)))
4129 while ((i
< 32) && !(val
& (3 << i
)))
4131 consecutive_zeros
+= 2;
4134 if (consecutive_zeros
> best_consecutive_zeros
)
4136 best_consecutive_zeros
= consecutive_zeros
;
4137 best_start
= i
- consecutive_zeros
;
4144 /* So long as it won't require any more insns to do so, it's
4145 desirable to emit a small constant (in bits 0...9) in the last
4146 insn. This way there is more chance that it can be combined with
4147 a later addressing insn to form a pre-indexed load or store
4148 operation. Consider:
4150 *((volatile int *)0xe0000100) = 1;
4151 *((volatile int *)0xe0000110) = 2;
4153 We want this to wind up as:
4157 str rB, [rA, #0x100]
4159 str rB, [rA, #0x110]
4161 rather than having to synthesize both large constants from scratch.
4163 Therefore, we calculate how many insns would be required to emit
4164 the constant starting from `best_start', and also starting from
4165 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4166 yield a shorter sequence, we may as well use zero. */
4167 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4169 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4171 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4172 if (insns2
<= insns1
)
4174 *return_sequence
= tmp_sequence
;
4182 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4184 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4185 struct four_ints
*return_sequence
, int i
)
4187 int remainder
= val
& 0xffffffff;
4190 /* Try and find a way of doing the job in either two or three
4193 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4194 location. We start at position I. This may be the MSB, or
4195 optimial_immediate_sequence may have positioned it at the largest block
4196 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4197 wrapping around to the top of the word when we drop off the bottom.
4198 In the worst case this code should produce no more than four insns.
4200 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4201 constants, shifted to any arbitrary location. We should always start
4206 unsigned int b1
, b2
, b3
, b4
;
4207 unsigned HOST_WIDE_INT result
;
4210 gcc_assert (insns
< 4);
4215 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4216 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4219 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4220 /* We can use addw/subw for the last 12 bits. */
4224 /* Use an 8-bit shifted/rotated immediate. */
4228 result
= remainder
& ((0x0ff << end
)
4229 | ((i
< end
) ? (0xff >> (32 - end
))
4236 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4237 arbitrary shifts. */
4238 i
-= TARGET_ARM
? 2 : 1;
4242 /* Next, see if we can do a better job with a thumb2 replicated
4245 We do it this way around to catch the cases like 0x01F001E0 where
4246 two 8-bit immediates would work, but a replicated constant would
4249 TODO: 16-bit constants that don't clear all the bits, but still win.
4250 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4253 b1
= (remainder
& 0xff000000) >> 24;
4254 b2
= (remainder
& 0x00ff0000) >> 16;
4255 b3
= (remainder
& 0x0000ff00) >> 8;
4256 b4
= remainder
& 0xff;
4260 /* The 8-bit immediate already found clears b1 (and maybe b2),
4261 but must leave b3 and b4 alone. */
4263 /* First try to find a 32-bit replicated constant that clears
4264 almost everything. We can assume that we can't do it in one,
4265 or else we wouldn't be here. */
4266 unsigned int tmp
= b1
& b2
& b3
& b4
;
4267 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4269 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4270 + (tmp
== b3
) + (tmp
== b4
);
4272 && (matching_bytes
>= 3
4273 || (matching_bytes
== 2
4274 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4276 /* At least 3 of the bytes match, and the fourth has at
4277 least as many bits set, or two of the bytes match
4278 and it will only require one more insn to finish. */
4286 /* Second, try to find a 16-bit replicated constant that can
4287 leave three of the bytes clear. If b2 or b4 is already
4288 zero, then we can. If the 8-bit from above would not
4289 clear b2 anyway, then we still win. */
4290 else if (b1
== b3
&& (!b2
|| !b4
4291 || (remainder
& 0x00ff0000 & ~result
)))
4293 result
= remainder
& 0xff00ff00;
4299 /* The 8-bit immediate already found clears b2 (and maybe b3)
4300 and we don't get here unless b1 is alredy clear, but it will
4301 leave b4 unchanged. */
4303 /* If we can clear b2 and b4 at once, then we win, since the
4304 8-bits couldn't possibly reach that far. */
4307 result
= remainder
& 0x00ff00ff;
4313 return_sequence
->i
[insns
++] = result
;
4314 remainder
&= ~result
;
4316 if (code
== SET
|| code
== MINUS
)
4324 /* Emit an instruction with the indicated PATTERN. If COND is
4325 non-NULL, conditionalize the execution of the instruction on COND
4329 emit_constant_insn (rtx cond
, rtx pattern
)
4332 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4333 emit_insn (pattern
);
4336 /* As above, but extra parameter GENERATE which, if clear, suppresses
4340 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4341 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4342 int subtargets
, int generate
)
4346 int final_invert
= 0;
4348 int set_sign_bit_copies
= 0;
4349 int clear_sign_bit_copies
= 0;
4350 int clear_zero_bit_copies
= 0;
4351 int set_zero_bit_copies
= 0;
4352 int insns
= 0, neg_insns
, inv_insns
;
4353 unsigned HOST_WIDE_INT temp1
, temp2
;
4354 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4355 struct four_ints
*immediates
;
4356 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4358 /* Find out which operations are safe for a given CODE. Also do a quick
4359 check for degenerate cases; these can occur when DImode operations
4372 if (remainder
== 0xffffffff)
4375 emit_constant_insn (cond
,
4376 gen_rtx_SET (target
,
4377 GEN_INT (ARM_SIGN_EXTEND (val
))));
4383 if (reload_completed
&& rtx_equal_p (target
, source
))
4387 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4396 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4399 if (remainder
== 0xffffffff)
4401 if (reload_completed
&& rtx_equal_p (target
, source
))
4404 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4413 if (reload_completed
&& rtx_equal_p (target
, source
))
4416 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4420 if (remainder
== 0xffffffff)
4423 emit_constant_insn (cond
,
4424 gen_rtx_SET (target
,
4425 gen_rtx_NOT (mode
, source
)));
4432 /* We treat MINUS as (val - source), since (source - val) is always
4433 passed as (source + (-val)). */
4437 emit_constant_insn (cond
,
4438 gen_rtx_SET (target
,
4439 gen_rtx_NEG (mode
, source
)));
4442 if (const_ok_for_arm (val
))
4445 emit_constant_insn (cond
,
4446 gen_rtx_SET (target
,
4447 gen_rtx_MINUS (mode
, GEN_INT (val
),
4458 /* If we can do it in one insn get out quickly. */
4459 if (const_ok_for_op (val
, code
))
4462 emit_constant_insn (cond
,
4463 gen_rtx_SET (target
,
4465 ? gen_rtx_fmt_ee (code
, mode
, source
,
4471 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4473 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4474 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4478 if (mode
== SImode
&& i
== 16)
4479 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4481 emit_constant_insn (cond
,
4482 gen_zero_extendhisi2
4483 (target
, gen_lowpart (HImode
, source
)));
4485 /* Extz only supports SImode, but we can coerce the operands
4487 emit_constant_insn (cond
,
4488 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4489 gen_lowpart (SImode
, source
),
4490 GEN_INT (i
), const0_rtx
));
4496 /* Calculate a few attributes that may be useful for specific
4498 /* Count number of leading zeros. */
4499 for (i
= 31; i
>= 0; i
--)
4501 if ((remainder
& (1 << i
)) == 0)
4502 clear_sign_bit_copies
++;
4507 /* Count number of leading 1's. */
4508 for (i
= 31; i
>= 0; i
--)
4510 if ((remainder
& (1 << i
)) != 0)
4511 set_sign_bit_copies
++;
4516 /* Count number of trailing zero's. */
4517 for (i
= 0; i
<= 31; i
++)
4519 if ((remainder
& (1 << i
)) == 0)
4520 clear_zero_bit_copies
++;
4525 /* Count number of trailing 1's. */
4526 for (i
= 0; i
<= 31; i
++)
4528 if ((remainder
& (1 << i
)) != 0)
4529 set_zero_bit_copies
++;
4537 /* See if we can do this by sign_extending a constant that is known
4538 to be negative. This is a good, way of doing it, since the shift
4539 may well merge into a subsequent insn. */
4540 if (set_sign_bit_copies
> 1)
4542 if (const_ok_for_arm
4543 (temp1
= ARM_SIGN_EXTEND (remainder
4544 << (set_sign_bit_copies
- 1))))
4548 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4549 emit_constant_insn (cond
,
4550 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4551 emit_constant_insn (cond
,
4552 gen_ashrsi3 (target
, new_src
,
4553 GEN_INT (set_sign_bit_copies
- 1)));
4557 /* For an inverted constant, we will need to set the low bits,
4558 these will be shifted out of harm's way. */
4559 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4560 if (const_ok_for_arm (~temp1
))
4564 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4565 emit_constant_insn (cond
,
4566 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4567 emit_constant_insn (cond
,
4568 gen_ashrsi3 (target
, new_src
,
4569 GEN_INT (set_sign_bit_copies
- 1)));
4575 /* See if we can calculate the value as the difference between two
4576 valid immediates. */
4577 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4579 int topshift
= clear_sign_bit_copies
& ~1;
4581 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4582 & (0xff000000 >> topshift
));
4584 /* If temp1 is zero, then that means the 9 most significant
4585 bits of remainder were 1 and we've caused it to overflow.
4586 When topshift is 0 we don't need to do anything since we
4587 can borrow from 'bit 32'. */
4588 if (temp1
== 0 && topshift
!= 0)
4589 temp1
= 0x80000000 >> (topshift
- 1);
4591 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4593 if (const_ok_for_arm (temp2
))
4597 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4598 emit_constant_insn (cond
,
4599 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4600 emit_constant_insn (cond
,
4601 gen_addsi3 (target
, new_src
,
4609 /* See if we can generate this by setting the bottom (or the top)
4610 16 bits, and then shifting these into the other half of the
4611 word. We only look for the simplest cases, to do more would cost
4612 too much. Be careful, however, not to generate this when the
4613 alternative would take fewer insns. */
4614 if (val
& 0xffff0000)
4616 temp1
= remainder
& 0xffff0000;
4617 temp2
= remainder
& 0x0000ffff;
4619 /* Overlaps outside this range are best done using other methods. */
4620 for (i
= 9; i
< 24; i
++)
4622 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4623 && !const_ok_for_arm (temp2
))
4625 rtx new_src
= (subtargets
4626 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4628 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4629 source
, subtargets
, generate
);
4637 gen_rtx_ASHIFT (mode
, source
,
4644 /* Don't duplicate cases already considered. */
4645 for (i
= 17; i
< 24; i
++)
4647 if (((temp1
| (temp1
>> i
)) == remainder
)
4648 && !const_ok_for_arm (temp1
))
4650 rtx new_src
= (subtargets
4651 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4653 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4654 source
, subtargets
, generate
);
4659 gen_rtx_SET (target
,
4662 gen_rtx_LSHIFTRT (mode
, source
,
4673 /* If we have IOR or XOR, and the constant can be loaded in a
4674 single instruction, and we can find a temporary to put it in,
4675 then this can be done in two instructions instead of 3-4. */
4677 /* TARGET can't be NULL if SUBTARGETS is 0 */
4678 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4680 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4684 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4686 emit_constant_insn (cond
,
4687 gen_rtx_SET (sub
, GEN_INT (val
)));
4688 emit_constant_insn (cond
,
4689 gen_rtx_SET (target
,
4690 gen_rtx_fmt_ee (code
, mode
,
4701 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4702 and the remainder 0s for e.g. 0xfff00000)
4703 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4705 This can be done in 2 instructions by using shifts with mov or mvn.
4710 mvn r0, r0, lsr #12 */
4711 if (set_sign_bit_copies
> 8
4712 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4716 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4717 rtx shift
= GEN_INT (set_sign_bit_copies
);
4723 gen_rtx_ASHIFT (mode
,
4728 gen_rtx_SET (target
,
4730 gen_rtx_LSHIFTRT (mode
, sub
,
4737 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4739 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4741 For eg. r0 = r0 | 0xfff
4746 if (set_zero_bit_copies
> 8
4747 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4751 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4752 rtx shift
= GEN_INT (set_zero_bit_copies
);
4758 gen_rtx_LSHIFTRT (mode
,
4763 gen_rtx_SET (target
,
4765 gen_rtx_ASHIFT (mode
, sub
,
4771 /* This will never be reached for Thumb2 because orn is a valid
4772 instruction. This is for Thumb1 and the ARM 32 bit cases.
4774 x = y | constant (such that ~constant is a valid constant)
4776 x = ~(~y & ~constant).
4778 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4782 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4783 emit_constant_insn (cond
,
4785 gen_rtx_NOT (mode
, source
)));
4788 sub
= gen_reg_rtx (mode
);
4789 emit_constant_insn (cond
,
4791 gen_rtx_AND (mode
, source
,
4793 emit_constant_insn (cond
,
4794 gen_rtx_SET (target
,
4795 gen_rtx_NOT (mode
, sub
)));
4802 /* See if two shifts will do 2 or more insn's worth of work. */
4803 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4805 HOST_WIDE_INT shift_mask
= ((0xffffffff
4806 << (32 - clear_sign_bit_copies
))
4809 if ((remainder
| shift_mask
) != 0xffffffff)
4811 HOST_WIDE_INT new_val
4812 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4816 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4817 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4818 new_src
, source
, subtargets
, 1);
4823 rtx targ
= subtargets
? NULL_RTX
: target
;
4824 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4825 targ
, source
, subtargets
, 0);
4831 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4832 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4834 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4835 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4841 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4843 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4845 if ((remainder
| shift_mask
) != 0xffffffff)
4847 HOST_WIDE_INT new_val
4848 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4851 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4853 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4854 new_src
, source
, subtargets
, 1);
4859 rtx targ
= subtargets
? NULL_RTX
: target
;
4861 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4862 targ
, source
, subtargets
, 0);
4868 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4869 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4871 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4872 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4884 /* Calculate what the instruction sequences would be if we generated it
4885 normally, negated, or inverted. */
4887 /* AND cannot be split into multiple insns, so invert and use BIC. */
4890 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4893 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4898 if (can_invert
|| final_invert
)
4899 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4904 immediates
= &pos_immediates
;
4906 /* Is the negated immediate sequence more efficient? */
4907 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4910 immediates
= &neg_immediates
;
4915 /* Is the inverted immediate sequence more efficient?
4916 We must allow for an extra NOT instruction for XOR operations, although
4917 there is some chance that the final 'mvn' will get optimized later. */
4918 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4921 immediates
= &inv_immediates
;
4929 /* Now output the chosen sequence as instructions. */
4932 for (i
= 0; i
< insns
; i
++)
4934 rtx new_src
, temp1_rtx
;
4936 temp1
= immediates
->i
[i
];
4938 if (code
== SET
|| code
== MINUS
)
4939 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4940 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4941 new_src
= gen_reg_rtx (mode
);
4947 else if (can_negate
)
4950 temp1
= trunc_int_for_mode (temp1
, mode
);
4951 temp1_rtx
= GEN_INT (temp1
);
4955 else if (code
== MINUS
)
4956 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4958 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4960 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
4965 can_negate
= can_invert
;
4969 else if (code
== MINUS
)
4977 emit_constant_insn (cond
, gen_rtx_SET (target
,
4978 gen_rtx_NOT (mode
, source
)));
4985 /* Canonicalize a comparison so that we are more likely to recognize it.
4986 This can be done for a few constant compares, where we can make the
4987 immediate value easier to load. */
4990 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4991 bool op0_preserve_value
)
4994 unsigned HOST_WIDE_INT i
, maxval
;
4996 mode
= GET_MODE (*op0
);
4997 if (mode
== VOIDmode
)
4998 mode
= GET_MODE (*op1
);
5000 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5002 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5003 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5004 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5005 for GTU/LEU in Thumb mode. */
5009 if (*code
== GT
|| *code
== LE
5010 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5012 /* Missing comparison. First try to use an available
5014 if (CONST_INT_P (*op1
))
5022 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5024 *op1
= GEN_INT (i
+ 1);
5025 *code
= *code
== GT
? GE
: LT
;
5031 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5032 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5034 *op1
= GEN_INT (i
+ 1);
5035 *code
= *code
== GTU
? GEU
: LTU
;
5044 /* If that did not work, reverse the condition. */
5045 if (!op0_preserve_value
)
5047 std::swap (*op0
, *op1
);
5048 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5054 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5055 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5056 to facilitate possible combining with a cmp into 'ands'. */
5058 && GET_CODE (*op0
) == ZERO_EXTEND
5059 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5060 && GET_MODE (XEXP (*op0
, 0)) == QImode
5061 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5062 && subreg_lowpart_p (XEXP (*op0
, 0))
5063 && *op1
== const0_rtx
)
5064 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5067 /* Comparisons smaller than DImode. Only adjust comparisons against
5068 an out-of-range constant. */
5069 if (!CONST_INT_P (*op1
)
5070 || const_ok_for_arm (INTVAL (*op1
))
5071 || const_ok_for_arm (- INTVAL (*op1
)))
5085 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5087 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5088 *code
= *code
== GT
? GE
: LT
;
5096 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5098 *op1
= GEN_INT (i
- 1);
5099 *code
= *code
== GE
? GT
: LE
;
5106 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5107 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5109 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5110 *code
= *code
== GTU
? GEU
: LTU
;
5118 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5120 *op1
= GEN_INT (i
- 1);
5121 *code
= *code
== GEU
? GTU
: LEU
;
5132 /* Define how to find the value returned by a function. */
5135 arm_function_value(const_tree type
, const_tree func
,
5136 bool outgoing ATTRIBUTE_UNUSED
)
5139 int unsignedp ATTRIBUTE_UNUSED
;
5140 rtx r ATTRIBUTE_UNUSED
;
5142 mode
= TYPE_MODE (type
);
5144 if (TARGET_AAPCS_BASED
)
5145 return aapcs_allocate_return_reg (mode
, type
, func
);
5147 /* Promote integer types. */
5148 if (INTEGRAL_TYPE_P (type
))
5149 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5151 /* Promotes small structs returned in a register to full-word size
5152 for big-endian AAPCS. */
5153 if (arm_return_in_msb (type
))
5155 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5156 if (size
% UNITS_PER_WORD
!= 0)
5158 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5159 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5163 return arm_libcall_value_1 (mode
);
5166 /* libcall hashtable helpers. */
5168 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5170 static inline hashval_t
hash (const rtx_def
*);
5171 static inline bool equal (const rtx_def
*, const rtx_def
*);
5172 static inline void remove (rtx_def
*);
5176 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5178 return rtx_equal_p (p1
, p2
);
5182 libcall_hasher::hash (const rtx_def
*p1
)
5184 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5187 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5190 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5192 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5196 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5198 static bool init_done
= false;
5199 static libcall_table_type
*libcall_htab
= NULL
;
5205 libcall_htab
= new libcall_table_type (31);
5206 add_libcall (libcall_htab
,
5207 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5208 add_libcall (libcall_htab
,
5209 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5210 add_libcall (libcall_htab
,
5211 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5212 add_libcall (libcall_htab
,
5213 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5215 add_libcall (libcall_htab
,
5216 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5217 add_libcall (libcall_htab
,
5218 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5219 add_libcall (libcall_htab
,
5220 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5221 add_libcall (libcall_htab
,
5222 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5224 add_libcall (libcall_htab
,
5225 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5226 add_libcall (libcall_htab
,
5227 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5228 add_libcall (libcall_htab
,
5229 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5230 add_libcall (libcall_htab
,
5231 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5232 add_libcall (libcall_htab
,
5233 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5234 add_libcall (libcall_htab
,
5235 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5236 add_libcall (libcall_htab
,
5237 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5238 add_libcall (libcall_htab
,
5239 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5241 /* Values from double-precision helper functions are returned in core
5242 registers if the selected core only supports single-precision
5243 arithmetic, even if we are using the hard-float ABI. The same is
5244 true for single-precision helpers, but we will never be using the
5245 hard-float ABI on a CPU which doesn't support single-precision
5246 operations in hardware. */
5247 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5248 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5249 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5250 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5251 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5252 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5253 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5254 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5255 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5256 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5257 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5258 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5260 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5264 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5268 arm_libcall_value_1 (machine_mode mode
)
5270 if (TARGET_AAPCS_BASED
)
5271 return aapcs_libcall_value (mode
);
5272 else if (TARGET_IWMMXT_ABI
5273 && arm_vector_mode_supported_p (mode
))
5274 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5276 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5279 /* Define how to find the value returned by a library function
5280 assuming the value has mode MODE. */
5283 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5285 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5286 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5288 /* The following libcalls return their result in integer registers,
5289 even though they return a floating point value. */
5290 if (arm_libcall_uses_aapcs_base (libcall
))
5291 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5295 return arm_libcall_value_1 (mode
);
5298 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5301 arm_function_value_regno_p (const unsigned int regno
)
5303 if (regno
== ARG_REGISTER (1)
5305 && TARGET_AAPCS_BASED
5307 && TARGET_HARD_FLOAT
5308 && regno
== FIRST_VFP_REGNUM
)
5309 || (TARGET_IWMMXT_ABI
5310 && regno
== FIRST_IWMMXT_REGNUM
))
5316 /* Determine the amount of memory needed to store the possible return
5317 registers of an untyped call. */
5319 arm_apply_result_size (void)
5325 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
5327 if (TARGET_IWMMXT_ABI
)
5334 /* Decide whether TYPE should be returned in memory (true)
5335 or in a register (false). FNTYPE is the type of the function making
5338 arm_return_in_memory (const_tree type
, const_tree fntype
)
5342 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5344 if (TARGET_AAPCS_BASED
)
5346 /* Simple, non-aggregate types (ie not including vectors and
5347 complex) are always returned in a register (or registers).
5348 We don't care about which register here, so we can short-cut
5349 some of the detail. */
5350 if (!AGGREGATE_TYPE_P (type
)
5351 && TREE_CODE (type
) != VECTOR_TYPE
5352 && TREE_CODE (type
) != COMPLEX_TYPE
)
5355 /* Any return value that is no larger than one word can be
5357 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5360 /* Check any available co-processors to see if they accept the
5361 type as a register candidate (VFP, for example, can return
5362 some aggregates in consecutive registers). These aren't
5363 available if the call is variadic. */
5364 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5367 /* Vector values should be returned using ARM registers, not
5368 memory (unless they're over 16 bytes, which will break since
5369 we only have four call-clobbered registers to play with). */
5370 if (TREE_CODE (type
) == VECTOR_TYPE
)
5371 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5373 /* The rest go in memory. */
5377 if (TREE_CODE (type
) == VECTOR_TYPE
)
5378 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5380 if (!AGGREGATE_TYPE_P (type
) &&
5381 (TREE_CODE (type
) != VECTOR_TYPE
))
5382 /* All simple types are returned in registers. */
5385 if (arm_abi
!= ARM_ABI_APCS
)
5387 /* ATPCS and later return aggregate types in memory only if they are
5388 larger than a word (or are variable size). */
5389 return (size
< 0 || size
> UNITS_PER_WORD
);
5392 /* For the arm-wince targets we choose to be compatible with Microsoft's
5393 ARM and Thumb compilers, which always return aggregates in memory. */
5395 /* All structures/unions bigger than one word are returned in memory.
5396 Also catch the case where int_size_in_bytes returns -1. In this case
5397 the aggregate is either huge or of variable size, and in either case
5398 we will want to return it via memory and not in a register. */
5399 if (size
< 0 || size
> UNITS_PER_WORD
)
5402 if (TREE_CODE (type
) == RECORD_TYPE
)
5406 /* For a struct the APCS says that we only return in a register
5407 if the type is 'integer like' and every addressable element
5408 has an offset of zero. For practical purposes this means
5409 that the structure can have at most one non bit-field element
5410 and that this element must be the first one in the structure. */
5412 /* Find the first field, ignoring non FIELD_DECL things which will
5413 have been created by C++. */
5414 for (field
= TYPE_FIELDS (type
);
5415 field
&& TREE_CODE (field
) != FIELD_DECL
;
5416 field
= DECL_CHAIN (field
))
5420 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5422 /* Check that the first field is valid for returning in a register. */
5424 /* ... Floats are not allowed */
5425 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5428 /* ... Aggregates that are not themselves valid for returning in
5429 a register are not allowed. */
5430 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5433 /* Now check the remaining fields, if any. Only bitfields are allowed,
5434 since they are not addressable. */
5435 for (field
= DECL_CHAIN (field
);
5437 field
= DECL_CHAIN (field
))
5439 if (TREE_CODE (field
) != FIELD_DECL
)
5442 if (!DECL_BIT_FIELD_TYPE (field
))
5449 if (TREE_CODE (type
) == UNION_TYPE
)
5453 /* Unions can be returned in registers if every element is
5454 integral, or can be returned in an integer register. */
5455 for (field
= TYPE_FIELDS (type
);
5457 field
= DECL_CHAIN (field
))
5459 if (TREE_CODE (field
) != FIELD_DECL
)
5462 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5465 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5471 #endif /* not ARM_WINCE */
5473 /* Return all other types in memory. */
5477 const struct pcs_attribute_arg
5481 } pcs_attribute_args
[] =
5483 {"aapcs", ARM_PCS_AAPCS
},
5484 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5486 /* We could recognize these, but changes would be needed elsewhere
5487 * to implement them. */
5488 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5489 {"atpcs", ARM_PCS_ATPCS
},
5490 {"apcs", ARM_PCS_APCS
},
5492 {NULL
, ARM_PCS_UNKNOWN
}
5496 arm_pcs_from_attribute (tree attr
)
5498 const struct pcs_attribute_arg
*ptr
;
5501 /* Get the value of the argument. */
5502 if (TREE_VALUE (attr
) == NULL_TREE
5503 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5504 return ARM_PCS_UNKNOWN
;
5506 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5508 /* Check it against the list of known arguments. */
5509 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5510 if (streq (arg
, ptr
->arg
))
5513 /* An unrecognized interrupt type. */
5514 return ARM_PCS_UNKNOWN
;
5517 /* Get the PCS variant to use for this call. TYPE is the function's type
5518 specification, DECL is the specific declartion. DECL may be null if
5519 the call could be indirect or if this is a library call. */
5521 arm_get_pcs_model (const_tree type
, const_tree decl
)
5523 bool user_convention
= false;
5524 enum arm_pcs user_pcs
= arm_pcs_default
;
5529 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5532 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5533 user_convention
= true;
5536 if (TARGET_AAPCS_BASED
)
5538 /* Detect varargs functions. These always use the base rules
5539 (no argument is ever a candidate for a co-processor
5541 bool base_rules
= stdarg_p (type
);
5543 if (user_convention
)
5545 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5546 sorry ("non-AAPCS derived PCS variant");
5547 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5548 error ("variadic functions must use the base AAPCS variant");
5552 return ARM_PCS_AAPCS
;
5553 else if (user_convention
)
5555 else if (decl
&& flag_unit_at_a_time
)
5557 /* Local functions never leak outside this compilation unit,
5558 so we are free to use whatever conventions are
5560 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5561 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5563 return ARM_PCS_AAPCS_LOCAL
;
5566 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5567 sorry ("PCS variant");
5569 /* For everything else we use the target's default. */
5570 return arm_pcs_default
;
5575 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5576 const_tree fntype ATTRIBUTE_UNUSED
,
5577 rtx libcall ATTRIBUTE_UNUSED
,
5578 const_tree fndecl ATTRIBUTE_UNUSED
)
5580 /* Record the unallocated VFP registers. */
5581 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5582 pcum
->aapcs_vfp_reg_alloc
= 0;
5585 /* Walk down the type tree of TYPE counting consecutive base elements.
5586 If *MODEP is VOIDmode, then set it to the first valid floating point
5587 type. If a non-floating point type is found, or if a floating point
5588 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5589 otherwise return the count in the sub-tree. */
5591 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5596 switch (TREE_CODE (type
))
5599 mode
= TYPE_MODE (type
);
5600 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5603 if (*modep
== VOIDmode
)
5612 mode
= TYPE_MODE (TREE_TYPE (type
));
5613 if (mode
!= DFmode
&& mode
!= SFmode
)
5616 if (*modep
== VOIDmode
)
5625 /* Use V2SImode and V4SImode as representatives of all 64-bit
5626 and 128-bit vector types, whether or not those modes are
5627 supported with the present options. */
5628 size
= int_size_in_bytes (type
);
5641 if (*modep
== VOIDmode
)
5644 /* Vector modes are considered to be opaque: two vectors are
5645 equivalent for the purposes of being homogeneous aggregates
5646 if they are the same size. */
5655 tree index
= TYPE_DOMAIN (type
);
5657 /* Can't handle incomplete types nor sizes that are not
5659 if (!COMPLETE_TYPE_P (type
)
5660 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5663 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5666 || !TYPE_MAX_VALUE (index
)
5667 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5668 || !TYPE_MIN_VALUE (index
)
5669 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5673 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5674 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5676 /* There must be no padding. */
5677 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5689 /* Can't handle incomplete types nor sizes that are not
5691 if (!COMPLETE_TYPE_P (type
)
5692 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5695 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5697 if (TREE_CODE (field
) != FIELD_DECL
)
5700 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5706 /* There must be no padding. */
5707 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5714 case QUAL_UNION_TYPE
:
5716 /* These aren't very interesting except in a degenerate case. */
5721 /* Can't handle incomplete types nor sizes that are not
5723 if (!COMPLETE_TYPE_P (type
)
5724 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5727 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5729 if (TREE_CODE (field
) != FIELD_DECL
)
5732 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5735 count
= count
> sub_count
? count
: sub_count
;
5738 /* There must be no padding. */
5739 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5752 /* Return true if PCS_VARIANT should use VFP registers. */
5754 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5756 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5758 static bool seen_thumb1_vfp
= false;
5760 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5762 sorry ("Thumb-1 hard-float VFP ABI");
5763 /* sorry() is not immediately fatal, so only display this once. */
5764 seen_thumb1_vfp
= true;
5770 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5773 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5774 (TARGET_VFP_DOUBLE
|| !is_double
));
5777 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5778 suitable for passing or returning in VFP registers for the PCS
5779 variant selected. If it is, then *BASE_MODE is updated to contain
5780 a machine mode describing each element of the argument's type and
5781 *COUNT to hold the number of such elements. */
5783 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5784 machine_mode mode
, const_tree type
,
5785 machine_mode
*base_mode
, int *count
)
5787 machine_mode new_mode
= VOIDmode
;
5789 /* If we have the type information, prefer that to working things
5790 out from the mode. */
5793 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5795 if (ag_count
> 0 && ag_count
<= 4)
5800 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5801 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5802 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5807 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5810 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5816 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5819 *base_mode
= new_mode
;
5824 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5825 machine_mode mode
, const_tree type
)
5827 int count ATTRIBUTE_UNUSED
;
5828 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5830 if (!use_vfp_abi (pcs_variant
, false))
5832 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5837 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5840 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5843 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5844 &pcum
->aapcs_vfp_rmode
,
5845 &pcum
->aapcs_vfp_rcount
);
5848 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5849 for the behaviour of this function. */
5852 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5853 const_tree type ATTRIBUTE_UNUSED
)
5856 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
5857 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
5858 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5861 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5862 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5864 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5866 || (mode
== TImode
&& ! TARGET_NEON
)
5867 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5870 int rcount
= pcum
->aapcs_vfp_rcount
;
5872 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5876 /* Avoid using unsupported vector modes. */
5877 if (rmode
== V2SImode
)
5879 else if (rmode
== V4SImode
)
5886 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5887 for (i
= 0; i
< rcount
; i
++)
5889 rtx tmp
= gen_rtx_REG (rmode
,
5890 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5891 tmp
= gen_rtx_EXPR_LIST
5893 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5894 XVECEXP (par
, 0, i
) = tmp
;
5897 pcum
->aapcs_reg
= par
;
5900 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5906 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5907 comment there for the behaviour of this function. */
5910 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5912 const_tree type ATTRIBUTE_UNUSED
)
5914 if (!use_vfp_abi (pcs_variant
, false))
5918 || (GET_MODE_CLASS (mode
) == MODE_INT
5919 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
5923 machine_mode ag_mode
;
5928 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5933 if (ag_mode
== V2SImode
)
5935 else if (ag_mode
== V4SImode
)
5941 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5942 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5943 for (i
= 0; i
< count
; i
++)
5945 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5946 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5947 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5948 XVECEXP (par
, 0, i
) = tmp
;
5954 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5958 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5959 machine_mode mode ATTRIBUTE_UNUSED
,
5960 const_tree type ATTRIBUTE_UNUSED
)
5962 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5963 pcum
->aapcs_vfp_reg_alloc
= 0;
5967 #define AAPCS_CP(X) \
5969 aapcs_ ## X ## _cum_init, \
5970 aapcs_ ## X ## _is_call_candidate, \
5971 aapcs_ ## X ## _allocate, \
5972 aapcs_ ## X ## _is_return_candidate, \
5973 aapcs_ ## X ## _allocate_return_reg, \
5974 aapcs_ ## X ## _advance \
5977 /* Table of co-processors that can be used to pass arguments in
5978 registers. Idealy no arugment should be a candidate for more than
5979 one co-processor table entry, but the table is processed in order
5980 and stops after the first match. If that entry then fails to put
5981 the argument into a co-processor register, the argument will go on
5985 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5986 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5988 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5989 BLKmode) is a candidate for this co-processor's registers; this
5990 function should ignore any position-dependent state in
5991 CUMULATIVE_ARGS and only use call-type dependent information. */
5992 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5994 /* Return true if the argument does get a co-processor register; it
5995 should set aapcs_reg to an RTX of the register allocated as is
5996 required for a return from FUNCTION_ARG. */
5997 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5999 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6000 be returned in this co-processor's registers. */
6001 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6003 /* Allocate and return an RTX element to hold the return type of a call. This
6004 routine must not fail and will only be called if is_return_candidate
6005 returned true with the same parameters. */
6006 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6008 /* Finish processing this argument and prepare to start processing
6010 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6011 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6019 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6024 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6025 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6032 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6034 /* We aren't passed a decl, so we can't check that a call is local.
6035 However, it isn't clear that that would be a win anyway, since it
6036 might limit some tail-calling opportunities. */
6037 enum arm_pcs pcs_variant
;
6041 const_tree fndecl
= NULL_TREE
;
6043 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6046 fntype
= TREE_TYPE (fntype
);
6049 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6052 pcs_variant
= arm_pcs_default
;
6054 if (pcs_variant
!= ARM_PCS_AAPCS
)
6058 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6059 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6068 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6071 /* We aren't passed a decl, so we can't check that a call is local.
6072 However, it isn't clear that that would be a win anyway, since it
6073 might limit some tail-calling opportunities. */
6074 enum arm_pcs pcs_variant
;
6075 int unsignedp ATTRIBUTE_UNUSED
;
6079 const_tree fndecl
= NULL_TREE
;
6081 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6084 fntype
= TREE_TYPE (fntype
);
6087 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6090 pcs_variant
= arm_pcs_default
;
6092 /* Promote integer types. */
6093 if (type
&& INTEGRAL_TYPE_P (type
))
6094 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6096 if (pcs_variant
!= ARM_PCS_AAPCS
)
6100 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6101 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6103 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6107 /* Promotes small structs returned in a register to full-word size
6108 for big-endian AAPCS. */
6109 if (type
&& arm_return_in_msb (type
))
6111 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6112 if (size
% UNITS_PER_WORD
!= 0)
6114 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6115 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6119 return gen_rtx_REG (mode
, R0_REGNUM
);
6123 aapcs_libcall_value (machine_mode mode
)
6125 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6126 && GET_MODE_SIZE (mode
) <= 4)
6129 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6132 /* Lay out a function argument using the AAPCS rules. The rule
6133 numbers referred to here are those in the AAPCS. */
6135 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6136 const_tree type
, bool named
)
6141 /* We only need to do this once per argument. */
6142 if (pcum
->aapcs_arg_processed
)
6145 pcum
->aapcs_arg_processed
= true;
6147 /* Special case: if named is false then we are handling an incoming
6148 anonymous argument which is on the stack. */
6152 /* Is this a potential co-processor register candidate? */
6153 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6155 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6156 pcum
->aapcs_cprc_slot
= slot
;
6158 /* We don't have to apply any of the rules from part B of the
6159 preparation phase, these are handled elsewhere in the
6164 /* A Co-processor register candidate goes either in its own
6165 class of registers or on the stack. */
6166 if (!pcum
->aapcs_cprc_failed
[slot
])
6168 /* C1.cp - Try to allocate the argument to co-processor
6170 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6173 /* C2.cp - Put the argument on the stack and note that we
6174 can't assign any more candidates in this slot. We also
6175 need to note that we have allocated stack space, so that
6176 we won't later try to split a non-cprc candidate between
6177 core registers and the stack. */
6178 pcum
->aapcs_cprc_failed
[slot
] = true;
6179 pcum
->can_split
= false;
6182 /* We didn't get a register, so this argument goes on the
6184 gcc_assert (pcum
->can_split
== false);
6189 /* C3 - For double-word aligned arguments, round the NCRN up to the
6190 next even number. */
6191 ncrn
= pcum
->aapcs_ncrn
;
6192 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6195 nregs
= ARM_NUM_REGS2(mode
, type
);
6197 /* Sigh, this test should really assert that nregs > 0, but a GCC
6198 extension allows empty structs and then gives them empty size; it
6199 then allows such a structure to be passed by value. For some of
6200 the code below we have to pretend that such an argument has
6201 non-zero size so that we 'locate' it correctly either in
6202 registers or on the stack. */
6203 gcc_assert (nregs
>= 0);
6205 nregs2
= nregs
? nregs
: 1;
6207 /* C4 - Argument fits entirely in core registers. */
6208 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6210 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6211 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6215 /* C5 - Some core registers left and there are no arguments already
6216 on the stack: split this argument between the remaining core
6217 registers and the stack. */
6218 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6220 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6221 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6222 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6226 /* C6 - NCRN is set to 4. */
6227 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6229 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6233 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6234 for a call to a function whose data type is FNTYPE.
6235 For a library call, FNTYPE is NULL. */
6237 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6239 tree fndecl ATTRIBUTE_UNUSED
)
6241 /* Long call handling. */
6243 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6245 pcum
->pcs_variant
= arm_pcs_default
;
6247 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6249 if (arm_libcall_uses_aapcs_base (libname
))
6250 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6252 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6253 pcum
->aapcs_reg
= NULL_RTX
;
6254 pcum
->aapcs_partial
= 0;
6255 pcum
->aapcs_arg_processed
= false;
6256 pcum
->aapcs_cprc_slot
= -1;
6257 pcum
->can_split
= true;
6259 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6263 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6265 pcum
->aapcs_cprc_failed
[i
] = false;
6266 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6274 /* On the ARM, the offset starts at 0. */
6276 pcum
->iwmmxt_nregs
= 0;
6277 pcum
->can_split
= true;
6279 /* Varargs vectors are treated the same as long long.
6280 named_count avoids having to change the way arm handles 'named' */
6281 pcum
->named_count
= 0;
6284 if (TARGET_REALLY_IWMMXT
&& fntype
)
6288 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6290 fn_arg
= TREE_CHAIN (fn_arg
))
6291 pcum
->named_count
+= 1;
6293 if (! pcum
->named_count
)
6294 pcum
->named_count
= INT_MAX
;
6298 /* Return true if mode/type need doubleword alignment. */
6300 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6303 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6305 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6306 if (!AGGREGATE_TYPE_P (type
))
6307 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6309 /* Array types: Use member alignment of element type. */
6310 if (TREE_CODE (type
) == ARRAY_TYPE
)
6311 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6313 /* Record/aggregate types: Use greatest member alignment of any member. */
6314 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6315 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6322 /* Determine where to put an argument to a function.
6323 Value is zero to push the argument on the stack,
6324 or a hard register in which to store the argument.
6326 MODE is the argument's machine mode.
6327 TYPE is the data type of the argument (as a tree).
6328 This is null for libcalls where that information may
6330 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6331 the preceding args and about the function being called.
6332 NAMED is nonzero if this argument is a named parameter
6333 (otherwise it is an extra parameter matching an ellipsis).
6335 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6336 other arguments are passed on the stack. If (NAMED == 0) (which happens
6337 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6338 defined), say it is passed in the stack (function_prologue will
6339 indeed make it pass in the stack if necessary). */
6342 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6343 const_tree type
, bool named
)
6345 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6348 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6349 a call insn (op3 of a call_value insn). */
6350 if (mode
== VOIDmode
)
6353 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6355 aapcs_layout_arg (pcum
, mode
, type
, named
);
6356 return pcum
->aapcs_reg
;
6359 /* Varargs vectors are treated the same as long long.
6360 named_count avoids having to change the way arm handles 'named' */
6361 if (TARGET_IWMMXT_ABI
6362 && arm_vector_mode_supported_p (mode
)
6363 && pcum
->named_count
> pcum
->nargs
+ 1)
6365 if (pcum
->iwmmxt_nregs
<= 9)
6366 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6369 pcum
->can_split
= false;
6374 /* Put doubleword aligned quantities in even register pairs. */
6376 && ARM_DOUBLEWORD_ALIGN
6377 && arm_needs_doubleword_align (mode
, type
))
6380 /* Only allow splitting an arg between regs and memory if all preceding
6381 args were allocated to regs. For args passed by reference we only count
6382 the reference pointer. */
6383 if (pcum
->can_split
)
6386 nregs
= ARM_NUM_REGS2 (mode
, type
);
6388 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6391 return gen_rtx_REG (mode
, pcum
->nregs
);
6395 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6397 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6398 ? DOUBLEWORD_ALIGNMENT
6403 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6404 tree type
, bool named
)
6406 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6407 int nregs
= pcum
->nregs
;
6409 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6411 aapcs_layout_arg (pcum
, mode
, type
, named
);
6412 return pcum
->aapcs_partial
;
6415 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6418 if (NUM_ARG_REGS
> nregs
6419 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6421 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6426 /* Update the data in PCUM to advance over an argument
6427 of mode MODE and data type TYPE.
6428 (TYPE is null for libcalls where that information may not be available.) */
6431 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6432 const_tree type
, bool named
)
6434 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6436 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6438 aapcs_layout_arg (pcum
, mode
, type
, named
);
6440 if (pcum
->aapcs_cprc_slot
>= 0)
6442 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6444 pcum
->aapcs_cprc_slot
= -1;
6447 /* Generic stuff. */
6448 pcum
->aapcs_arg_processed
= false;
6449 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6450 pcum
->aapcs_reg
= NULL_RTX
;
6451 pcum
->aapcs_partial
= 0;
6456 if (arm_vector_mode_supported_p (mode
)
6457 && pcum
->named_count
> pcum
->nargs
6458 && TARGET_IWMMXT_ABI
)
6459 pcum
->iwmmxt_nregs
+= 1;
6461 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6465 /* Variable sized types are passed by reference. This is a GCC
6466 extension to the ARM ABI. */
6469 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6470 machine_mode mode ATTRIBUTE_UNUSED
,
6471 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6473 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6476 /* Encode the current state of the #pragma [no_]long_calls. */
6479 OFF
, /* No #pragma [no_]long_calls is in effect. */
6480 LONG
, /* #pragma long_calls is in effect. */
6481 SHORT
/* #pragma no_long_calls is in effect. */
6484 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6487 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6489 arm_pragma_long_calls
= LONG
;
6493 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6495 arm_pragma_long_calls
= SHORT
;
6499 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6501 arm_pragma_long_calls
= OFF
;
6504 /* Handle an attribute requiring a FUNCTION_DECL;
6505 arguments as in struct attribute_spec.handler. */
6507 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6508 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6510 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6512 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6514 *no_add_attrs
= true;
6520 /* Handle an "interrupt" or "isr" attribute;
6521 arguments as in struct attribute_spec.handler. */
6523 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6528 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6530 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6532 *no_add_attrs
= true;
6534 /* FIXME: the argument if any is checked for type attributes;
6535 should it be checked for decl ones? */
6539 if (TREE_CODE (*node
) == FUNCTION_TYPE
6540 || TREE_CODE (*node
) == METHOD_TYPE
)
6542 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6544 warning (OPT_Wattributes
, "%qE attribute ignored",
6546 *no_add_attrs
= true;
6549 else if (TREE_CODE (*node
) == POINTER_TYPE
6550 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6551 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6552 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6554 *node
= build_variant_type_copy (*node
);
6555 TREE_TYPE (*node
) = build_type_attribute_variant
6557 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6558 *no_add_attrs
= true;
6562 /* Possibly pass this attribute on from the type to a decl. */
6563 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6564 | (int) ATTR_FLAG_FUNCTION_NEXT
6565 | (int) ATTR_FLAG_ARRAY_NEXT
))
6567 *no_add_attrs
= true;
6568 return tree_cons (name
, args
, NULL_TREE
);
6572 warning (OPT_Wattributes
, "%qE attribute ignored",
6581 /* Handle a "pcs" attribute; arguments as in struct
6582 attribute_spec.handler. */
6584 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6585 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6587 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6589 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6590 *no_add_attrs
= true;
6595 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6596 /* Handle the "notshared" attribute. This attribute is another way of
6597 requesting hidden visibility. ARM's compiler supports
6598 "__declspec(notshared)"; we support the same thing via an
6602 arm_handle_notshared_attribute (tree
*node
,
6603 tree name ATTRIBUTE_UNUSED
,
6604 tree args ATTRIBUTE_UNUSED
,
6605 int flags ATTRIBUTE_UNUSED
,
6608 tree decl
= TYPE_NAME (*node
);
6612 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6613 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6614 *no_add_attrs
= false;
6620 /* Return 0 if the attributes for two types are incompatible, 1 if they
6621 are compatible, and 2 if they are nearly compatible (which causes a
6622 warning to be generated). */
6624 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6628 /* Check for mismatch of non-default calling convention. */
6629 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6632 /* Check for mismatched call attributes. */
6633 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6634 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6635 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6636 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6638 /* Only bother to check if an attribute is defined. */
6639 if (l1
| l2
| s1
| s2
)
6641 /* If one type has an attribute, the other must have the same attribute. */
6642 if ((l1
!= l2
) || (s1
!= s2
))
6645 /* Disallow mixed attributes. */
6646 if ((l1
& s2
) || (l2
& s1
))
6650 /* Check for mismatched ISR attribute. */
6651 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6653 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6654 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6656 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6663 /* Assigns default attributes to newly defined type. This is used to
6664 set short_call/long_call attributes for function types of
6665 functions defined inside corresponding #pragma scopes. */
6667 arm_set_default_type_attributes (tree type
)
6669 /* Add __attribute__ ((long_call)) to all functions, when
6670 inside #pragma long_calls or __attribute__ ((short_call)),
6671 when inside #pragma no_long_calls. */
6672 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6674 tree type_attr_list
, attr_name
;
6675 type_attr_list
= TYPE_ATTRIBUTES (type
);
6677 if (arm_pragma_long_calls
== LONG
)
6678 attr_name
= get_identifier ("long_call");
6679 else if (arm_pragma_long_calls
== SHORT
)
6680 attr_name
= get_identifier ("short_call");
6684 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6685 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6689 /* Return true if DECL is known to be linked into section SECTION. */
6692 arm_function_in_section_p (tree decl
, section
*section
)
6694 /* We can only be certain about the prevailing symbol definition. */
6695 if (!decl_binds_to_current_def_p (decl
))
6698 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6699 if (!DECL_SECTION_NAME (decl
))
6701 /* Make sure that we will not create a unique section for DECL. */
6702 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6706 return function_section (decl
) == section
;
6709 /* Return nonzero if a 32-bit "long_call" should be generated for
6710 a call from the current function to DECL. We generate a long_call
6713 a. has an __attribute__((long call))
6714 or b. is within the scope of a #pragma long_calls
6715 or c. the -mlong-calls command line switch has been specified
6717 However we do not generate a long call if the function:
6719 d. has an __attribute__ ((short_call))
6720 or e. is inside the scope of a #pragma no_long_calls
6721 or f. is defined in the same section as the current function. */
6724 arm_is_long_call_p (tree decl
)
6729 return TARGET_LONG_CALLS
;
6731 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6732 if (lookup_attribute ("short_call", attrs
))
6735 /* For "f", be conservative, and only cater for cases in which the
6736 whole of the current function is placed in the same section. */
6737 if (!flag_reorder_blocks_and_partition
6738 && TREE_CODE (decl
) == FUNCTION_DECL
6739 && arm_function_in_section_p (decl
, current_function_section ()))
6742 if (lookup_attribute ("long_call", attrs
))
6745 return TARGET_LONG_CALLS
;
6748 /* Return nonzero if it is ok to make a tail-call to DECL. */
6750 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6752 unsigned long func_type
;
6754 if (cfun
->machine
->sibcall_blocked
)
6757 /* Never tailcall something if we are generating code for Thumb-1. */
6761 /* The PIC register is live on entry to VxWorks PLT entries, so we
6762 must make the call before restoring the PIC register. */
6763 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
6766 /* If we are interworking and the function is not declared static
6767 then we can't tail-call it unless we know that it exists in this
6768 compilation unit (since it might be a Thumb routine). */
6769 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6770 && !TREE_ASM_WRITTEN (decl
))
6773 func_type
= arm_current_func_type ();
6774 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6775 if (IS_INTERRUPT (func_type
))
6778 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6780 /* Check that the return value locations are the same. For
6781 example that we aren't returning a value from the sibling in
6782 a VFP register but then need to transfer it to a core
6785 tree decl_or_type
= decl
;
6787 /* If it is an indirect function pointer, get the function type. */
6789 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
6791 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
6792 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6794 if (!rtx_equal_p (a
, b
))
6798 /* Never tailcall if function may be called with a misaligned SP. */
6799 if (IS_STACKALIGN (func_type
))
6802 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6803 references should become a NOP. Don't convert such calls into
6805 if (TARGET_AAPCS_BASED
6806 && arm_abi
== ARM_ABI_AAPCS
6808 && DECL_WEAK (decl
))
6811 /* Everything else is ok. */
6816 /* Addressing mode support functions. */
6818 /* Return nonzero if X is a legitimate immediate operand when compiling
6819 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6821 legitimate_pic_operand_p (rtx x
)
6823 if (GET_CODE (x
) == SYMBOL_REF
6824 || (GET_CODE (x
) == CONST
6825 && GET_CODE (XEXP (x
, 0)) == PLUS
6826 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6832 /* Record that the current function needs a PIC register. Initialize
6833 cfun->machine->pic_reg if we have not already done so. */
6836 require_pic_register (void)
6838 /* A lot of the logic here is made obscure by the fact that this
6839 routine gets called as part of the rtx cost estimation process.
6840 We don't want those calls to affect any assumptions about the real
6841 function; and further, we can't call entry_of_function() until we
6842 start the real expansion process. */
6843 if (!crtl
->uses_pic_offset_table
)
6845 gcc_assert (can_create_pseudo_p ());
6846 if (arm_pic_register
!= INVALID_REGNUM
6847 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6849 if (!cfun
->machine
->pic_reg
)
6850 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6852 /* Play games to avoid marking the function as needing pic
6853 if we are being called as part of the cost-estimation
6855 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6856 crtl
->uses_pic_offset_table
= 1;
6860 rtx_insn
*seq
, *insn
;
6862 if (!cfun
->machine
->pic_reg
)
6863 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6865 /* Play games to avoid marking the function as needing pic
6866 if we are being called as part of the cost-estimation
6868 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6870 crtl
->uses_pic_offset_table
= 1;
6873 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6874 && arm_pic_register
> LAST_LO_REGNUM
)
6875 emit_move_insn (cfun
->machine
->pic_reg
,
6876 gen_rtx_REG (Pmode
, arm_pic_register
));
6878 arm_load_pic_register (0UL);
6883 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6885 INSN_LOCATION (insn
) = prologue_location
;
6887 /* We can be called during expansion of PHI nodes, where
6888 we can't yet emit instructions directly in the final
6889 insn stream. Queue the insns on the entry edge, they will
6890 be committed after everything else is expanded. */
6891 insert_insn_on_edge (seq
,
6892 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6899 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6901 if (GET_CODE (orig
) == SYMBOL_REF
6902 || GET_CODE (orig
) == LABEL_REF
)
6908 gcc_assert (can_create_pseudo_p ());
6909 reg
= gen_reg_rtx (Pmode
);
6912 /* VxWorks does not impose a fixed gap between segments; the run-time
6913 gap can be different from the object-file gap. We therefore can't
6914 use GOTOFF unless we are absolutely sure that the symbol is in the
6915 same segment as the GOT. Unfortunately, the flexibility of linker
6916 scripts means that we can't be sure of that in general, so assume
6917 that GOTOFF is never valid on VxWorks. */
6918 if ((GET_CODE (orig
) == LABEL_REF
6919 || (GET_CODE (orig
) == SYMBOL_REF
&&
6920 SYMBOL_REF_LOCAL_P (orig
)))
6922 && arm_pic_data_is_text_relative
)
6923 insn
= arm_pic_static_addr (orig
, reg
);
6929 /* If this function doesn't have a pic register, create one now. */
6930 require_pic_register ();
6932 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6934 /* Make the MEM as close to a constant as possible. */
6935 mem
= SET_SRC (pat
);
6936 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6937 MEM_READONLY_P (mem
) = 1;
6938 MEM_NOTRAP_P (mem
) = 1;
6940 insn
= emit_insn (pat
);
6943 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6945 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6949 else if (GET_CODE (orig
) == CONST
)
6953 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6954 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6957 /* Handle the case where we have: const (UNSPEC_TLS). */
6958 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6959 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6962 /* Handle the case where we have:
6963 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6965 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6966 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6967 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6969 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6975 gcc_assert (can_create_pseudo_p ());
6976 reg
= gen_reg_rtx (Pmode
);
6979 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6981 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6982 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6983 base
== reg
? 0 : reg
);
6985 if (CONST_INT_P (offset
))
6987 /* The base register doesn't really matter, we only want to
6988 test the index for the appropriate mode. */
6989 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6991 gcc_assert (can_create_pseudo_p ());
6992 offset
= force_reg (Pmode
, offset
);
6995 if (CONST_INT_P (offset
))
6996 return plus_constant (Pmode
, base
, INTVAL (offset
));
6999 if (GET_MODE_SIZE (mode
) > 4
7000 && (GET_MODE_CLASS (mode
) == MODE_INT
7001 || TARGET_SOFT_FLOAT
))
7003 emit_insn (gen_addsi3 (reg
, base
, offset
));
7007 return gen_rtx_PLUS (Pmode
, base
, offset
);
7014 /* Find a spare register to use during the prolog of a function. */
7017 thumb_find_work_register (unsigned long pushed_regs_mask
)
7021 /* Check the argument registers first as these are call-used. The
7022 register allocation order means that sometimes r3 might be used
7023 but earlier argument registers might not, so check them all. */
7024 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7025 if (!df_regs_ever_live_p (reg
))
7028 /* Before going on to check the call-saved registers we can try a couple
7029 more ways of deducing that r3 is available. The first is when we are
7030 pushing anonymous arguments onto the stack and we have less than 4
7031 registers worth of fixed arguments(*). In this case r3 will be part of
7032 the variable argument list and so we can be sure that it will be
7033 pushed right at the start of the function. Hence it will be available
7034 for the rest of the prologue.
7035 (*): ie crtl->args.pretend_args_size is greater than 0. */
7036 if (cfun
->machine
->uses_anonymous_args
7037 && crtl
->args
.pretend_args_size
> 0)
7038 return LAST_ARG_REGNUM
;
7040 /* The other case is when we have fixed arguments but less than 4 registers
7041 worth. In this case r3 might be used in the body of the function, but
7042 it is not being used to convey an argument into the function. In theory
7043 we could just check crtl->args.size to see how many bytes are
7044 being passed in argument registers, but it seems that it is unreliable.
7045 Sometimes it will have the value 0 when in fact arguments are being
7046 passed. (See testcase execute/20021111-1.c for an example). So we also
7047 check the args_info.nregs field as well. The problem with this field is
7048 that it makes no allowances for arguments that are passed to the
7049 function but which are not used. Hence we could miss an opportunity
7050 when a function has an unused argument in r3. But it is better to be
7051 safe than to be sorry. */
7052 if (! cfun
->machine
->uses_anonymous_args
7053 && crtl
->args
.size
>= 0
7054 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7055 && (TARGET_AAPCS_BASED
7056 ? crtl
->args
.info
.aapcs_ncrn
< 4
7057 : crtl
->args
.info
.nregs
< 4))
7058 return LAST_ARG_REGNUM
;
7060 /* Otherwise look for a call-saved register that is going to be pushed. */
7061 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7062 if (pushed_regs_mask
& (1 << reg
))
7067 /* Thumb-2 can use high regs. */
7068 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7069 if (pushed_regs_mask
& (1 << reg
))
7072 /* Something went wrong - thumb_compute_save_reg_mask()
7073 should have arranged for a suitable register to be pushed. */
7077 static GTY(()) int pic_labelno
;
7079 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7083 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7085 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7087 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7090 gcc_assert (flag_pic
);
7092 pic_reg
= cfun
->machine
->pic_reg
;
7093 if (TARGET_VXWORKS_RTP
)
7095 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7096 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7097 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7099 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7101 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7102 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7106 /* We use an UNSPEC rather than a LABEL_REF because this label
7107 never appears in the code stream. */
7109 labelno
= GEN_INT (pic_labelno
++);
7110 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7111 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7113 /* On the ARM the PC register contains 'dot + 8' at the time of the
7114 addition, on the Thumb it is 'dot + 4'. */
7115 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7116 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7118 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7122 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7124 else /* TARGET_THUMB1 */
7126 if (arm_pic_register
!= INVALID_REGNUM
7127 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7129 /* We will have pushed the pic register, so we should always be
7130 able to find a work register. */
7131 pic_tmp
= gen_rtx_REG (SImode
,
7132 thumb_find_work_register (saved_regs
));
7133 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7134 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7135 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7137 else if (arm_pic_register
!= INVALID_REGNUM
7138 && arm_pic_register
> LAST_LO_REGNUM
7139 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7141 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7142 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7143 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7146 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7150 /* Need to emit this whether or not we obey regdecls,
7151 since setjmp/longjmp can cause life info to screw up. */
7155 /* Generate code to load the address of a static var when flag_pic is set. */
7157 arm_pic_static_addr (rtx orig
, rtx reg
)
7159 rtx l1
, labelno
, offset_rtx
, insn
;
7161 gcc_assert (flag_pic
);
7163 /* We use an UNSPEC rather than a LABEL_REF because this label
7164 never appears in the code stream. */
7165 labelno
= GEN_INT (pic_labelno
++);
7166 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7167 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7169 /* On the ARM the PC register contains 'dot + 8' at the time of the
7170 addition, on the Thumb it is 'dot + 4'. */
7171 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7172 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7173 UNSPEC_SYMBOL_OFFSET
);
7174 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7176 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7180 /* Return nonzero if X is valid as an ARM state addressing register. */
7182 arm_address_register_rtx_p (rtx x
, int strict_p
)
7192 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7194 return (regno
<= LAST_ARM_REGNUM
7195 || regno
>= FIRST_PSEUDO_REGISTER
7196 || regno
== FRAME_POINTER_REGNUM
7197 || regno
== ARG_POINTER_REGNUM
);
7200 /* Return TRUE if this rtx is the difference of a symbol and a label,
7201 and will reduce to a PC-relative relocation in the object file.
7202 Expressions like this can be left alone when generating PIC, rather
7203 than forced through the GOT. */
7205 pcrel_constant_p (rtx x
)
7207 if (GET_CODE (x
) == MINUS
)
7208 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7213 /* Return true if X will surely end up in an index register after next
7216 will_be_in_index_register (const_rtx x
)
7218 /* arm.md: calculate_pic_address will split this into a register. */
7219 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7222 /* Return nonzero if X is a valid ARM state address operand. */
7224 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7228 enum rtx_code code
= GET_CODE (x
);
7230 if (arm_address_register_rtx_p (x
, strict_p
))
7233 use_ldrd
= (TARGET_LDRD
7235 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7237 if (code
== POST_INC
|| code
== PRE_DEC
7238 || ((code
== PRE_INC
|| code
== POST_DEC
)
7239 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7240 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7242 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7243 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7244 && GET_CODE (XEXP (x
, 1)) == PLUS
7245 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7247 rtx addend
= XEXP (XEXP (x
, 1), 1);
7249 /* Don't allow ldrd post increment by register because it's hard
7250 to fixup invalid register choices. */
7252 && GET_CODE (x
) == POST_MODIFY
7256 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7257 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7260 /* After reload constants split into minipools will have addresses
7261 from a LABEL_REF. */
7262 else if (reload_completed
7263 && (code
== LABEL_REF
7265 && GET_CODE (XEXP (x
, 0)) == PLUS
7266 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7267 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7270 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7273 else if (code
== PLUS
)
7275 rtx xop0
= XEXP (x
, 0);
7276 rtx xop1
= XEXP (x
, 1);
7278 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7279 && ((CONST_INT_P (xop1
)
7280 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7281 || (!strict_p
&& will_be_in_index_register (xop1
))))
7282 || (arm_address_register_rtx_p (xop1
, strict_p
)
7283 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7287 /* Reload currently can't handle MINUS, so disable this for now */
7288 else if (GET_CODE (x
) == MINUS
)
7290 rtx xop0
= XEXP (x
, 0);
7291 rtx xop1
= XEXP (x
, 1);
7293 return (arm_address_register_rtx_p (xop0
, strict_p
)
7294 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7298 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7299 && code
== SYMBOL_REF
7300 && CONSTANT_POOL_ADDRESS_P (x
)
7302 && symbol_mentioned_p (get_pool_constant (x
))
7303 && ! pcrel_constant_p (get_pool_constant (x
))))
7309 /* Return nonzero if X is a valid Thumb-2 address operand. */
7311 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7314 enum rtx_code code
= GET_CODE (x
);
7316 if (arm_address_register_rtx_p (x
, strict_p
))
7319 use_ldrd
= (TARGET_LDRD
7321 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7323 if (code
== POST_INC
|| code
== PRE_DEC
7324 || ((code
== PRE_INC
|| code
== POST_DEC
)
7325 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7326 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7328 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7329 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7330 && GET_CODE (XEXP (x
, 1)) == PLUS
7331 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7333 /* Thumb-2 only has autoincrement by constant. */
7334 rtx addend
= XEXP (XEXP (x
, 1), 1);
7335 HOST_WIDE_INT offset
;
7337 if (!CONST_INT_P (addend
))
7340 offset
= INTVAL(addend
);
7341 if (GET_MODE_SIZE (mode
) <= 4)
7342 return (offset
> -256 && offset
< 256);
7344 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7345 && (offset
& 3) == 0);
7348 /* After reload constants split into minipools will have addresses
7349 from a LABEL_REF. */
7350 else if (reload_completed
7351 && (code
== LABEL_REF
7353 && GET_CODE (XEXP (x
, 0)) == PLUS
7354 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7355 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7358 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7361 else if (code
== PLUS
)
7363 rtx xop0
= XEXP (x
, 0);
7364 rtx xop1
= XEXP (x
, 1);
7366 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7367 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7368 || (!strict_p
&& will_be_in_index_register (xop1
))))
7369 || (arm_address_register_rtx_p (xop1
, strict_p
)
7370 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7373 /* Normally we can assign constant values to target registers without
7374 the help of constant pool. But there are cases we have to use constant
7376 1) assign a label to register.
7377 2) sign-extend a 8bit value to 32bit and then assign to register.
7379 Constant pool access in format:
7380 (set (reg r0) (mem (symbol_ref (".LC0"))))
7381 will cause the use of literal pool (later in function arm_reorg).
7382 So here we mark such format as an invalid format, then the compiler
7383 will adjust it into:
7384 (set (reg r0) (symbol_ref (".LC0")))
7385 (set (reg r0) (mem (reg r0))).
7386 No extra register is required, and (mem (reg r0)) won't cause the use
7387 of literal pools. */
7388 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7389 && CONSTANT_POOL_ADDRESS_P (x
))
7392 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7393 && code
== SYMBOL_REF
7394 && CONSTANT_POOL_ADDRESS_P (x
)
7396 && symbol_mentioned_p (get_pool_constant (x
))
7397 && ! pcrel_constant_p (get_pool_constant (x
))))
7403 /* Return nonzero if INDEX is valid for an address index operand in
7406 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7409 HOST_WIDE_INT range
;
7410 enum rtx_code code
= GET_CODE (index
);
7412 /* Standard coprocessor addressing modes. */
7413 if (TARGET_HARD_FLOAT
7415 && (mode
== SFmode
|| mode
== DFmode
))
7416 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7417 && INTVAL (index
) > -1024
7418 && (INTVAL (index
) & 3) == 0);
7420 /* For quad modes, we restrict the constant offset to be slightly less
7421 than what the instruction format permits. We do this because for
7422 quad mode moves, we will actually decompose them into two separate
7423 double-mode reads or writes. INDEX must therefore be a valid
7424 (double-mode) offset and so should INDEX+8. */
7425 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7426 return (code
== CONST_INT
7427 && INTVAL (index
) < 1016
7428 && INTVAL (index
) > -1024
7429 && (INTVAL (index
) & 3) == 0);
7431 /* We have no such constraint on double mode offsets, so we permit the
7432 full range of the instruction format. */
7433 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7434 return (code
== CONST_INT
7435 && INTVAL (index
) < 1024
7436 && INTVAL (index
) > -1024
7437 && (INTVAL (index
) & 3) == 0);
7439 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7440 return (code
== CONST_INT
7441 && INTVAL (index
) < 1024
7442 && INTVAL (index
) > -1024
7443 && (INTVAL (index
) & 3) == 0);
7445 if (arm_address_register_rtx_p (index
, strict_p
)
7446 && (GET_MODE_SIZE (mode
) <= 4))
7449 if (mode
== DImode
|| mode
== DFmode
)
7451 if (code
== CONST_INT
)
7453 HOST_WIDE_INT val
= INTVAL (index
);
7456 return val
> -256 && val
< 256;
7458 return val
> -4096 && val
< 4092;
7461 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7464 if (GET_MODE_SIZE (mode
) <= 4
7468 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7472 rtx xiop0
= XEXP (index
, 0);
7473 rtx xiop1
= XEXP (index
, 1);
7475 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7476 && power_of_two_operand (xiop1
, SImode
))
7477 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7478 && power_of_two_operand (xiop0
, SImode
)));
7480 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7481 || code
== ASHIFT
|| code
== ROTATERT
)
7483 rtx op
= XEXP (index
, 1);
7485 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7488 && INTVAL (op
) <= 31);
7492 /* For ARM v4 we may be doing a sign-extend operation during the
7498 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7504 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7506 return (code
== CONST_INT
7507 && INTVAL (index
) < range
7508 && INTVAL (index
) > -range
);
7511 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7512 index operand. i.e. 1, 2, 4 or 8. */
7514 thumb2_index_mul_operand (rtx op
)
7518 if (!CONST_INT_P (op
))
7522 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7525 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7527 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7529 enum rtx_code code
= GET_CODE (index
);
7531 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7532 /* Standard coprocessor addressing modes. */
7533 if (TARGET_HARD_FLOAT
7535 && (mode
== SFmode
|| mode
== DFmode
))
7536 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7537 /* Thumb-2 allows only > -256 index range for it's core register
7538 load/stores. Since we allow SF/DF in core registers, we have
7539 to use the intersection between -256~4096 (core) and -1024~1024
7541 && INTVAL (index
) > -256
7542 && (INTVAL (index
) & 3) == 0);
7544 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7546 /* For DImode assume values will usually live in core regs
7547 and only allow LDRD addressing modes. */
7548 if (!TARGET_LDRD
|| mode
!= DImode
)
7549 return (code
== CONST_INT
7550 && INTVAL (index
) < 1024
7551 && INTVAL (index
) > -1024
7552 && (INTVAL (index
) & 3) == 0);
7555 /* For quad modes, we restrict the constant offset to be slightly less
7556 than what the instruction format permits. We do this because for
7557 quad mode moves, we will actually decompose them into two separate
7558 double-mode reads or writes. INDEX must therefore be a valid
7559 (double-mode) offset and so should INDEX+8. */
7560 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7561 return (code
== CONST_INT
7562 && INTVAL (index
) < 1016
7563 && INTVAL (index
) > -1024
7564 && (INTVAL (index
) & 3) == 0);
7566 /* We have no such constraint on double mode offsets, so we permit the
7567 full range of the instruction format. */
7568 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7569 return (code
== CONST_INT
7570 && INTVAL (index
) < 1024
7571 && INTVAL (index
) > -1024
7572 && (INTVAL (index
) & 3) == 0);
7574 if (arm_address_register_rtx_p (index
, strict_p
)
7575 && (GET_MODE_SIZE (mode
) <= 4))
7578 if (mode
== DImode
|| mode
== DFmode
)
7580 if (code
== CONST_INT
)
7582 HOST_WIDE_INT val
= INTVAL (index
);
7583 /* ??? Can we assume ldrd for thumb2? */
7584 /* Thumb-2 ldrd only has reg+const addressing modes. */
7585 /* ldrd supports offsets of +-1020.
7586 However the ldr fallback does not. */
7587 return val
> -256 && val
< 256 && (val
& 3) == 0;
7595 rtx xiop0
= XEXP (index
, 0);
7596 rtx xiop1
= XEXP (index
, 1);
7598 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7599 && thumb2_index_mul_operand (xiop1
))
7600 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7601 && thumb2_index_mul_operand (xiop0
)));
7603 else if (code
== ASHIFT
)
7605 rtx op
= XEXP (index
, 1);
7607 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7610 && INTVAL (op
) <= 3);
7613 return (code
== CONST_INT
7614 && INTVAL (index
) < 4096
7615 && INTVAL (index
) > -256);
7618 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7620 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7630 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7632 return (regno
<= LAST_LO_REGNUM
7633 || regno
> LAST_VIRTUAL_REGISTER
7634 || regno
== FRAME_POINTER_REGNUM
7635 || (GET_MODE_SIZE (mode
) >= 4
7636 && (regno
== STACK_POINTER_REGNUM
7637 || regno
>= FIRST_PSEUDO_REGISTER
7638 || x
== hard_frame_pointer_rtx
7639 || x
== arg_pointer_rtx
)));
7642 /* Return nonzero if x is a legitimate index register. This is the case
7643 for any base register that can access a QImode object. */
7645 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7647 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7650 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7652 The AP may be eliminated to either the SP or the FP, so we use the
7653 least common denominator, e.g. SImode, and offsets from 0 to 64.
7655 ??? Verify whether the above is the right approach.
7657 ??? Also, the FP may be eliminated to the SP, so perhaps that
7658 needs special handling also.
7660 ??? Look at how the mips16 port solves this problem. It probably uses
7661 better ways to solve some of these problems.
7663 Although it is not incorrect, we don't accept QImode and HImode
7664 addresses based on the frame pointer or arg pointer until the
7665 reload pass starts. This is so that eliminating such addresses
7666 into stack based ones won't produce impossible code. */
7668 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7670 /* ??? Not clear if this is right. Experiment. */
7671 if (GET_MODE_SIZE (mode
) < 4
7672 && !(reload_in_progress
|| reload_completed
)
7673 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7674 || reg_mentioned_p (arg_pointer_rtx
, x
)
7675 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7676 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7677 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7678 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7681 /* Accept any base register. SP only in SImode or larger. */
7682 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7685 /* This is PC relative data before arm_reorg runs. */
7686 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7687 && GET_CODE (x
) == SYMBOL_REF
7688 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7691 /* This is PC relative data after arm_reorg runs. */
7692 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7694 && (GET_CODE (x
) == LABEL_REF
7695 || (GET_CODE (x
) == CONST
7696 && GET_CODE (XEXP (x
, 0)) == PLUS
7697 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7698 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7701 /* Post-inc indexing only supported for SImode and larger. */
7702 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7703 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7706 else if (GET_CODE (x
) == PLUS
)
7708 /* REG+REG address can be any two index registers. */
7709 /* We disallow FRAME+REG addressing since we know that FRAME
7710 will be replaced with STACK, and SP relative addressing only
7711 permits SP+OFFSET. */
7712 if (GET_MODE_SIZE (mode
) <= 4
7713 && XEXP (x
, 0) != frame_pointer_rtx
7714 && XEXP (x
, 1) != frame_pointer_rtx
7715 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7716 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7717 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7720 /* REG+const has 5-7 bit offset for non-SP registers. */
7721 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7722 || XEXP (x
, 0) == arg_pointer_rtx
)
7723 && CONST_INT_P (XEXP (x
, 1))
7724 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7727 /* REG+const has 10-bit offset for SP, but only SImode and
7728 larger is supported. */
7729 /* ??? Should probably check for DI/DFmode overflow here
7730 just like GO_IF_LEGITIMATE_OFFSET does. */
7731 else if (REG_P (XEXP (x
, 0))
7732 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7733 && GET_MODE_SIZE (mode
) >= 4
7734 && CONST_INT_P (XEXP (x
, 1))
7735 && INTVAL (XEXP (x
, 1)) >= 0
7736 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7737 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7740 else if (REG_P (XEXP (x
, 0))
7741 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7742 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7743 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7744 && REGNO (XEXP (x
, 0))
7745 <= LAST_VIRTUAL_POINTER_REGISTER
))
7746 && GET_MODE_SIZE (mode
) >= 4
7747 && CONST_INT_P (XEXP (x
, 1))
7748 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7752 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7753 && GET_MODE_SIZE (mode
) == 4
7754 && GET_CODE (x
) == SYMBOL_REF
7755 && CONSTANT_POOL_ADDRESS_P (x
)
7757 && symbol_mentioned_p (get_pool_constant (x
))
7758 && ! pcrel_constant_p (get_pool_constant (x
))))
7764 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7765 instruction of mode MODE. */
7767 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7769 switch (GET_MODE_SIZE (mode
))
7772 return val
>= 0 && val
< 32;
7775 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7779 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7785 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7788 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7789 else if (TARGET_THUMB2
)
7790 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7791 else /* if (TARGET_THUMB1) */
7792 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7795 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7797 Given an rtx X being reloaded into a reg required to be
7798 in class CLASS, return the class of reg to actually use.
7799 In general this is just CLASS, but for the Thumb core registers and
7800 immediate constants we prefer a LO_REGS class or a subset. */
7803 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7809 if (rclass
== GENERAL_REGS
)
7816 /* Build the SYMBOL_REF for __tls_get_addr. */
7818 static GTY(()) rtx tls_get_addr_libfunc
;
7821 get_tls_get_addr (void)
7823 if (!tls_get_addr_libfunc
)
7824 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7825 return tls_get_addr_libfunc
;
7829 arm_load_tp (rtx target
)
7832 target
= gen_reg_rtx (SImode
);
7836 /* Can return in any reg. */
7837 emit_insn (gen_load_tp_hard (target
));
7841 /* Always returned in r0. Immediately copy the result into a pseudo,
7842 otherwise other uses of r0 (e.g. setting up function arguments) may
7843 clobber the value. */
7847 emit_insn (gen_load_tp_soft ());
7849 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7850 emit_move_insn (target
, tmp
);
7856 load_tls_operand (rtx x
, rtx reg
)
7860 if (reg
== NULL_RTX
)
7861 reg
= gen_reg_rtx (SImode
);
7863 tmp
= gen_rtx_CONST (SImode
, x
);
7865 emit_move_insn (reg
, tmp
);
7871 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7873 rtx insns
, label
, labelno
, sum
;
7875 gcc_assert (reloc
!= TLS_DESCSEQ
);
7878 labelno
= GEN_INT (pic_labelno
++);
7879 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7880 label
= gen_rtx_CONST (VOIDmode
, label
);
7882 sum
= gen_rtx_UNSPEC (Pmode
,
7883 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7884 GEN_INT (TARGET_ARM
? 8 : 4)),
7886 reg
= load_tls_operand (sum
, reg
);
7889 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7891 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7893 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7894 LCT_PURE
, /* LCT_CONST? */
7895 Pmode
, 1, reg
, Pmode
);
7897 insns
= get_insns ();
7904 arm_tls_descseq_addr (rtx x
, rtx reg
)
7906 rtx labelno
= GEN_INT (pic_labelno
++);
7907 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7908 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7909 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7910 gen_rtx_CONST (VOIDmode
, label
),
7911 GEN_INT (!TARGET_ARM
)),
7913 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7915 emit_insn (gen_tlscall (x
, labelno
));
7917 reg
= gen_reg_rtx (SImode
);
7919 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7921 emit_move_insn (reg
, reg0
);
7927 legitimize_tls_address (rtx x
, rtx reg
)
7929 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7930 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7934 case TLS_MODEL_GLOBAL_DYNAMIC
:
7935 if (TARGET_GNU2_TLS
)
7937 reg
= arm_tls_descseq_addr (x
, reg
);
7939 tp
= arm_load_tp (NULL_RTX
);
7941 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7945 /* Original scheme */
7946 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7947 dest
= gen_reg_rtx (Pmode
);
7948 emit_libcall_block (insns
, dest
, ret
, x
);
7952 case TLS_MODEL_LOCAL_DYNAMIC
:
7953 if (TARGET_GNU2_TLS
)
7955 reg
= arm_tls_descseq_addr (x
, reg
);
7957 tp
= arm_load_tp (NULL_RTX
);
7959 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7963 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7965 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7966 share the LDM result with other LD model accesses. */
7967 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7969 dest
= gen_reg_rtx (Pmode
);
7970 emit_libcall_block (insns
, dest
, ret
, eqv
);
7972 /* Load the addend. */
7973 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7974 GEN_INT (TLS_LDO32
)),
7976 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7977 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7981 case TLS_MODEL_INITIAL_EXEC
:
7982 labelno
= GEN_INT (pic_labelno
++);
7983 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7984 label
= gen_rtx_CONST (VOIDmode
, label
);
7985 sum
= gen_rtx_UNSPEC (Pmode
,
7986 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7987 GEN_INT (TARGET_ARM
? 8 : 4)),
7989 reg
= load_tls_operand (sum
, reg
);
7992 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7993 else if (TARGET_THUMB2
)
7994 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7997 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7998 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8001 tp
= arm_load_tp (NULL_RTX
);
8003 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8005 case TLS_MODEL_LOCAL_EXEC
:
8006 tp
= arm_load_tp (NULL_RTX
);
8008 reg
= gen_rtx_UNSPEC (Pmode
,
8009 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8011 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8013 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8020 /* Try machine-dependent ways of modifying an illegitimate address
8021 to be legitimate. If we find one, return the new, valid address. */
8023 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8025 if (arm_tls_referenced_p (x
))
8029 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8031 addend
= XEXP (XEXP (x
, 0), 1);
8032 x
= XEXP (XEXP (x
, 0), 0);
8035 if (GET_CODE (x
) != SYMBOL_REF
)
8038 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8040 x
= legitimize_tls_address (x
, NULL_RTX
);
8044 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8053 /* TODO: legitimize_address for Thumb2. */
8056 return thumb_legitimize_address (x
, orig_x
, mode
);
8059 if (GET_CODE (x
) == PLUS
)
8061 rtx xop0
= XEXP (x
, 0);
8062 rtx xop1
= XEXP (x
, 1);
8064 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8065 xop0
= force_reg (SImode
, xop0
);
8067 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8068 && !symbol_mentioned_p (xop1
))
8069 xop1
= force_reg (SImode
, xop1
);
8071 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8072 && CONST_INT_P (xop1
))
8074 HOST_WIDE_INT n
, low_n
;
8078 /* VFP addressing modes actually allow greater offsets, but for
8079 now we just stick with the lowest common denominator. */
8081 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
8093 low_n
= ((mode
) == TImode
? 0
8094 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8098 base_reg
= gen_reg_rtx (SImode
);
8099 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8100 emit_move_insn (base_reg
, val
);
8101 x
= plus_constant (Pmode
, base_reg
, low_n
);
8103 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8104 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8107 /* XXX We don't allow MINUS any more -- see comment in
8108 arm_legitimate_address_outer_p (). */
8109 else if (GET_CODE (x
) == MINUS
)
8111 rtx xop0
= XEXP (x
, 0);
8112 rtx xop1
= XEXP (x
, 1);
8114 if (CONSTANT_P (xop0
))
8115 xop0
= force_reg (SImode
, xop0
);
8117 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8118 xop1
= force_reg (SImode
, xop1
);
8120 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8121 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8124 /* Make sure to take full advantage of the pre-indexed addressing mode
8125 with absolute addresses which often allows for the base register to
8126 be factorized for multiple adjacent memory references, and it might
8127 even allows for the mini pool to be avoided entirely. */
8128 else if (CONST_INT_P (x
) && optimize
> 0)
8131 HOST_WIDE_INT mask
, base
, index
;
8134 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8135 use a 8-bit index. So let's use a 12-bit index for SImode only and
8136 hope that arm_gen_constant will enable ldrb to use more bits. */
8137 bits
= (mode
== SImode
) ? 12 : 8;
8138 mask
= (1 << bits
) - 1;
8139 base
= INTVAL (x
) & ~mask
;
8140 index
= INTVAL (x
) & mask
;
8141 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8143 /* It'll most probably be more efficient to generate the base
8144 with more bits set and use a negative index instead. */
8148 base_reg
= force_reg (SImode
, GEN_INT (base
));
8149 x
= plus_constant (Pmode
, base_reg
, index
);
8154 /* We need to find and carefully transform any SYMBOL and LABEL
8155 references; so go back to the original address expression. */
8156 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8158 if (new_x
!= orig_x
)
8166 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8167 to be legitimate. If we find one, return the new, valid address. */
8169 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8171 if (GET_CODE (x
) == PLUS
8172 && CONST_INT_P (XEXP (x
, 1))
8173 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8174 || INTVAL (XEXP (x
, 1)) < 0))
8176 rtx xop0
= XEXP (x
, 0);
8177 rtx xop1
= XEXP (x
, 1);
8178 HOST_WIDE_INT offset
= INTVAL (xop1
);
8180 /* Try and fold the offset into a biasing of the base register and
8181 then offsetting that. Don't do this when optimizing for space
8182 since it can cause too many CSEs. */
8183 if (optimize_size
&& offset
>= 0
8184 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8186 HOST_WIDE_INT delta
;
8189 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8190 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8191 delta
= 31 * GET_MODE_SIZE (mode
);
8193 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8195 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8197 x
= plus_constant (Pmode
, xop0
, delta
);
8199 else if (offset
< 0 && offset
> -256)
8200 /* Small negative offsets are best done with a subtract before the
8201 dereference, forcing these into a register normally takes two
8203 x
= force_operand (x
, NULL_RTX
);
8206 /* For the remaining cases, force the constant into a register. */
8207 xop1
= force_reg (SImode
, xop1
);
8208 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8211 else if (GET_CODE (x
) == PLUS
8212 && s_register_operand (XEXP (x
, 1), SImode
)
8213 && !s_register_operand (XEXP (x
, 0), SImode
))
8215 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8217 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8222 /* We need to find and carefully transform any SYMBOL and LABEL
8223 references; so go back to the original address expression. */
8224 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8226 if (new_x
!= orig_x
)
8233 /* Return TRUE if X contains any TLS symbol references. */
8236 arm_tls_referenced_p (rtx x
)
8238 if (! TARGET_HAVE_TLS
)
8241 subrtx_iterator::array_type array
;
8242 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8244 const_rtx x
= *iter
;
8245 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8248 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8249 TLS offsets, not real symbol references. */
8250 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8251 iter
.skip_subrtxes ();
8256 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8258 On the ARM, allow any integer (invalid ones are removed later by insn
8259 patterns), nice doubles and symbol_refs which refer to the function's
8262 When generating pic allow anything. */
8265 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8267 return flag_pic
|| !label_mentioned_p (x
);
8271 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8273 return (CONST_INT_P (x
)
8274 || CONST_DOUBLE_P (x
)
8275 || CONSTANT_ADDRESS_P (x
)
8280 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8282 return (!arm_cannot_force_const_mem (mode
, x
)
8284 ? arm_legitimate_constant_p_1 (mode
, x
)
8285 : thumb_legitimate_constant_p (mode
, x
)));
8288 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8291 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8295 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8297 split_const (x
, &base
, &offset
);
8298 if (GET_CODE (base
) == SYMBOL_REF
8299 && !offset_within_block_p (base
, INTVAL (offset
)))
8302 return arm_tls_referenced_p (x
);
8305 #define REG_OR_SUBREG_REG(X) \
8307 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8309 #define REG_OR_SUBREG_RTX(X) \
8310 (REG_P (X) ? (X) : SUBREG_REG (X))
8313 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8315 machine_mode mode
= GET_MODE (x
);
8324 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8331 return COSTS_N_INSNS (1);
8334 if (CONST_INT_P (XEXP (x
, 1)))
8337 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8344 return COSTS_N_INSNS (2) + cycles
;
8346 return COSTS_N_INSNS (1) + 16;
8349 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8351 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8352 return (COSTS_N_INSNS (words
)
8353 + 4 * ((MEM_P (SET_SRC (x
)))
8354 + MEM_P (SET_DEST (x
))));
8359 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8361 if (thumb_shiftable_const (INTVAL (x
)))
8362 return COSTS_N_INSNS (2);
8363 return COSTS_N_INSNS (3);
8365 else if ((outer
== PLUS
|| outer
== COMPARE
)
8366 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8368 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8369 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8370 return COSTS_N_INSNS (1);
8371 else if (outer
== AND
)
8374 /* This duplicates the tests in the andsi3 expander. */
8375 for (i
= 9; i
<= 31; i
++)
8376 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8377 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8378 return COSTS_N_INSNS (2);
8380 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8381 || outer
== LSHIFTRT
)
8383 return COSTS_N_INSNS (2);
8389 return COSTS_N_INSNS (3);
8407 /* XXX another guess. */
8408 /* Memory costs quite a lot for the first word, but subsequent words
8409 load at the equivalent of a single insn each. */
8410 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8411 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8416 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8422 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8423 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8429 return total
+ COSTS_N_INSNS (1);
8431 /* Assume a two-shift sequence. Increase the cost slightly so
8432 we prefer actual shifts over an extend operation. */
8433 return total
+ 1 + COSTS_N_INSNS (2);
8441 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8443 machine_mode mode
= GET_MODE (x
);
8444 enum rtx_code subcode
;
8446 enum rtx_code code
= GET_CODE (x
);
8452 /* Memory costs quite a lot for the first word, but subsequent words
8453 load at the equivalent of a single insn each. */
8454 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8461 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8462 *total
= COSTS_N_INSNS (2);
8463 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8464 *total
= COSTS_N_INSNS (4);
8466 *total
= COSTS_N_INSNS (20);
8470 if (REG_P (XEXP (x
, 1)))
8471 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8472 else if (!CONST_INT_P (XEXP (x
, 1)))
8473 *total
= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8479 *total
+= COSTS_N_INSNS (4);
8484 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8485 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8488 *total
+= COSTS_N_INSNS (3);
8492 *total
+= COSTS_N_INSNS (1);
8493 /* Increase the cost of complex shifts because they aren't any faster,
8494 and reduce dual issue opportunities. */
8495 if (arm_tune_cortex_a9
8496 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8504 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8505 if (CONST_INT_P (XEXP (x
, 0))
8506 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8508 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8512 if (CONST_INT_P (XEXP (x
, 1))
8513 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8515 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8522 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8524 if (TARGET_HARD_FLOAT
8526 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8528 *total
= COSTS_N_INSNS (1);
8529 if (CONST_DOUBLE_P (XEXP (x
, 0))
8530 && arm_const_double_rtx (XEXP (x
, 0)))
8532 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8536 if (CONST_DOUBLE_P (XEXP (x
, 1))
8537 && arm_const_double_rtx (XEXP (x
, 1)))
8539 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8545 *total
= COSTS_N_INSNS (20);
8549 *total
= COSTS_N_INSNS (1);
8550 if (CONST_INT_P (XEXP (x
, 0))
8551 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8553 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8557 subcode
= GET_CODE (XEXP (x
, 1));
8558 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8559 || subcode
== LSHIFTRT
8560 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8562 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8563 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, subcode
, 0, speed
);
8567 /* A shift as a part of RSB costs no more than RSB itself. */
8568 if (GET_CODE (XEXP (x
, 0)) == MULT
8569 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8571 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
, 0, speed
);
8572 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8577 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8579 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8580 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, subcode
, 0, speed
);
8584 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8585 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8587 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
8589 if (REG_P (XEXP (XEXP (x
, 1), 0))
8590 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8591 *total
+= COSTS_N_INSNS (1);
8599 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8600 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8601 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8603 *total
= COSTS_N_INSNS (1);
8604 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
8605 GET_CODE (XEXP (x
, 0)), 0, speed
);
8606 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8610 /* MLA: All arguments must be registers. We filter out
8611 multiplication by a power of two, so that we fall down into
8613 if (GET_CODE (XEXP (x
, 0)) == MULT
8614 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8616 /* The cost comes from the cost of the multiply. */
8620 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8622 if (TARGET_HARD_FLOAT
8624 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8626 *total
= COSTS_N_INSNS (1);
8627 if (CONST_DOUBLE_P (XEXP (x
, 1))
8628 && arm_const_double_rtx (XEXP (x
, 1)))
8630 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8637 *total
= COSTS_N_INSNS (20);
8641 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8642 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8644 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), mode
, code
,
8646 if (REG_P (XEXP (XEXP (x
, 0), 0))
8647 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8648 *total
+= COSTS_N_INSNS (1);
8654 case AND
: case XOR
: case IOR
:
8656 /* Normally the frame registers will be spilt into reg+const during
8657 reload, so it is a bad idea to combine them with other instructions,
8658 since then they might not be moved outside of loops. As a compromise
8659 we allow integration with ops that have a constant as their second
8661 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8662 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8663 && !CONST_INT_P (XEXP (x
, 1)))
8664 *total
= COSTS_N_INSNS (1);
8668 *total
+= COSTS_N_INSNS (2);
8669 if (CONST_INT_P (XEXP (x
, 1))
8670 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8672 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8679 *total
+= COSTS_N_INSNS (1);
8680 if (CONST_INT_P (XEXP (x
, 1))
8681 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8683 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8686 subcode
= GET_CODE (XEXP (x
, 0));
8687 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8688 || subcode
== LSHIFTRT
8689 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8691 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8692 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8697 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8699 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8700 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8704 if (subcode
== UMIN
|| subcode
== UMAX
8705 || subcode
== SMIN
|| subcode
== SMAX
)
8707 *total
= COSTS_N_INSNS (3);
8714 /* This should have been handled by the CPU specific routines. */
8718 if (arm_arch3m
&& mode
== SImode
8719 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8720 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8721 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8722 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8723 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8724 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8726 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, LSHIFTRT
,
8730 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8734 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8736 if (TARGET_HARD_FLOAT
8738 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8740 *total
= COSTS_N_INSNS (1);
8743 *total
= COSTS_N_INSNS (2);
8749 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8750 if (mode
== SImode
&& code
== NOT
)
8752 subcode
= GET_CODE (XEXP (x
, 0));
8753 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8754 || subcode
== LSHIFTRT
8755 || subcode
== ROTATE
|| subcode
== ROTATERT
8757 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8759 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
,
8761 /* Register shifts cost an extra cycle. */
8762 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8763 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8773 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8775 *total
= COSTS_N_INSNS (4);
8779 operand
= XEXP (x
, 0);
8781 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8782 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8783 && REG_P (XEXP (operand
, 0))
8784 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8785 *total
+= COSTS_N_INSNS (1);
8786 *total
+= rtx_cost (XEXP (x
, 1), VOIDmode
, code
, 1, speed
);
8787 *total
+= rtx_cost (XEXP (x
, 2), VOIDmode
, code
, 2, speed
);
8791 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8793 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
8800 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8801 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8803 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
8810 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8811 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8813 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
8834 /* SCC insns. In the case where the comparison has already been
8835 performed, then they cost 2 instructions. Otherwise they need
8836 an additional comparison before them. */
8837 *total
= COSTS_N_INSNS (2);
8838 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8845 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8851 *total
+= COSTS_N_INSNS (1);
8852 if (CONST_INT_P (XEXP (x
, 1))
8853 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8855 *total
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed
);
8859 subcode
= GET_CODE (XEXP (x
, 0));
8860 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8861 || subcode
== LSHIFTRT
8862 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8864 mode
= GET_MODE (XEXP (x
, 0));
8865 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8866 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8871 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8873 mode
= GET_MODE (XEXP (x
, 0));
8874 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8875 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8885 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8886 if (!CONST_INT_P (XEXP (x
, 1))
8887 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8888 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8892 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8894 if (TARGET_HARD_FLOAT
8896 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8898 *total
= COSTS_N_INSNS (1);
8901 *total
= COSTS_N_INSNS (20);
8904 *total
= COSTS_N_INSNS (1);
8906 *total
+= COSTS_N_INSNS (3);
8912 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8914 rtx op
= XEXP (x
, 0);
8915 machine_mode opmode
= GET_MODE (op
);
8918 *total
+= COSTS_N_INSNS (1);
8920 if (opmode
!= SImode
)
8924 /* If !arm_arch4, we use one of the extendhisi2_mem
8925 or movhi_bytes patterns for HImode. For a QImode
8926 sign extension, we first zero-extend from memory
8927 and then perform a shift sequence. */
8928 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8929 *total
+= COSTS_N_INSNS (2);
8932 *total
+= COSTS_N_INSNS (1);
8934 /* We don't have the necessary insn, so we need to perform some
8936 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8937 /* An and with constant 255. */
8938 *total
+= COSTS_N_INSNS (1);
8940 /* A shift sequence. Increase costs slightly to avoid
8941 combining two shifts into an extend operation. */
8942 *total
+= COSTS_N_INSNS (2) + 1;
8948 switch (GET_MODE (XEXP (x
, 0)))
8955 *total
= COSTS_N_INSNS (1);
8965 mode
= GET_MODE (XEXP (x
, 0));
8966 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8970 if (const_ok_for_arm (INTVAL (x
))
8971 || const_ok_for_arm (~INTVAL (x
)))
8972 *total
= COSTS_N_INSNS (1);
8974 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8975 INTVAL (x
), NULL_RTX
,
8982 *total
= COSTS_N_INSNS (3);
8986 *total
= COSTS_N_INSNS (1);
8990 *total
= COSTS_N_INSNS (1);
8991 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8995 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8996 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8997 *total
= COSTS_N_INSNS (1);
8999 *total
= COSTS_N_INSNS (4);
9003 /* The vec_extract patterns accept memory operands that require an
9004 address reload. Account for the cost of that reload to give the
9005 auto-inc-dec pass an incentive to try to replace them. */
9006 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
9007 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
9009 mode
= GET_MODE (SET_DEST (x
));
9010 *total
= rtx_cost (SET_DEST (x
), mode
, code
, 0, speed
);
9011 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
9012 *total
+= COSTS_N_INSNS (1);
9015 /* Likewise for the vec_set patterns. */
9016 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
9017 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
9018 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
9020 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
9021 mode
= GET_MODE (SET_DEST (x
));
9022 *total
= rtx_cost (mem
, mode
, code
, 0, speed
);
9023 if (!neon_vector_mem_operand (mem
, 2, true))
9024 *total
+= COSTS_N_INSNS (1);
9030 /* We cost this as high as our memory costs to allow this to
9031 be hoisted from loops. */
9032 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
9034 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
9040 && TARGET_HARD_FLOAT
9042 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9043 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9044 *total
= COSTS_N_INSNS (1);
9046 *total
= COSTS_N_INSNS (4);
9050 *total
= COSTS_N_INSNS (4);
9055 /* Estimates the size cost of thumb1 instructions.
9056 For now most of the code is copied from thumb1_rtx_costs. We need more
9057 fine grain tuning when we have more related test cases. */
9059 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9061 machine_mode mode
= GET_MODE (x
);
9070 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9074 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9075 defined by RTL expansion, especially for the expansion of
9077 if ((GET_CODE (XEXP (x
, 0)) == MULT
9078 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9079 || (GET_CODE (XEXP (x
, 1)) == MULT
9080 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9081 return COSTS_N_INSNS (2);
9082 /* On purpose fall through for normal RTX. */
9086 return COSTS_N_INSNS (1);
9089 if (CONST_INT_P (XEXP (x
, 1)))
9091 /* Thumb1 mul instruction can't operate on const. We must Load it
9092 into a register first. */
9093 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9094 /* For the targets which have a very small and high-latency multiply
9095 unit, we prefer to synthesize the mult with up to 5 instructions,
9096 giving a good balance between size and performance. */
9097 if (arm_arch6m
&& arm_m_profile_small_mul
)
9098 return COSTS_N_INSNS (5);
9100 return COSTS_N_INSNS (1) + const_size
;
9102 return COSTS_N_INSNS (1);
9105 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9107 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9108 return COSTS_N_INSNS (words
)
9109 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
9110 || satisfies_constraint_K (SET_SRC (x
))
9111 /* thumb1_movdi_insn. */
9112 || ((words
> 1) && MEM_P (SET_SRC (x
))));
9117 if (UINTVAL (x
) < 256)
9118 return COSTS_N_INSNS (1);
9119 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9120 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9121 return COSTS_N_INSNS (2);
9122 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9123 if (thumb_shiftable_const (INTVAL (x
)))
9124 return COSTS_N_INSNS (2);
9125 return COSTS_N_INSNS (3);
9127 else if ((outer
== PLUS
|| outer
== COMPARE
)
9128 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9130 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9131 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9132 return COSTS_N_INSNS (1);
9133 else if (outer
== AND
)
9136 /* This duplicates the tests in the andsi3 expander. */
9137 for (i
= 9; i
<= 31; i
++)
9138 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9139 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9140 return COSTS_N_INSNS (2);
9142 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9143 || outer
== LSHIFTRT
)
9145 return COSTS_N_INSNS (2);
9151 return COSTS_N_INSNS (3);
9165 return COSTS_N_INSNS (1);
9168 return (COSTS_N_INSNS (1)
9170 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9171 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9172 ? COSTS_N_INSNS (1) : 0));
9176 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9181 /* XXX still guessing. */
9182 switch (GET_MODE (XEXP (x
, 0)))
9185 return (1 + (mode
== DImode
? 4 : 0)
9186 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9189 return (4 + (mode
== DImode
? 4 : 0)
9190 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9193 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9204 /* RTX costs when optimizing for size. */
9206 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9209 machine_mode mode
= GET_MODE (x
);
9212 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9216 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9220 /* A memory access costs 1 insn if the mode is small, or the address is
9221 a single register, otherwise it costs one insn per word. */
9222 if (REG_P (XEXP (x
, 0)))
9223 *total
= COSTS_N_INSNS (1);
9225 && GET_CODE (XEXP (x
, 0)) == PLUS
9226 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9227 /* This will be split into two instructions.
9228 See arm.md:calculate_pic_address. */
9229 *total
= COSTS_N_INSNS (2);
9231 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9238 /* Needs a libcall, so it costs about this. */
9239 *total
= COSTS_N_INSNS (2);
9243 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9245 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
9254 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9256 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), mode
, code
,
9260 else if (mode
== SImode
)
9262 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
9264 /* Slightly disparage register shifts, but not by much. */
9265 if (!CONST_INT_P (XEXP (x
, 1)))
9266 *total
+= 1 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, false);
9270 /* Needs a libcall. */
9271 *total
= COSTS_N_INSNS (2);
9275 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9276 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9278 *total
= COSTS_N_INSNS (1);
9284 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9285 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9287 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9288 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9289 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9290 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9291 || subcode1
== ASHIFTRT
)
9293 /* It's just the cost of the two operands. */
9298 *total
= COSTS_N_INSNS (1);
9302 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9306 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9307 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9309 *total
= COSTS_N_INSNS (1);
9313 /* A shift as a part of ADD costs nothing. */
9314 if (GET_CODE (XEXP (x
, 0)) == MULT
9315 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9317 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9318 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
, 0, false);
9319 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, false);
9324 case AND
: case XOR
: case IOR
:
9327 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9329 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9330 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9331 || (code
== AND
&& subcode
== NOT
))
9333 /* It's just the cost of the two operands. */
9339 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9343 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9347 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9348 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9350 *total
= COSTS_N_INSNS (1);
9356 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9365 if (cc_register (XEXP (x
, 0), VOIDmode
))
9368 *total
= COSTS_N_INSNS (1);
9372 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9373 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9374 *total
= COSTS_N_INSNS (1);
9376 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9381 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9384 if (const_ok_for_arm (INTVAL (x
)))
9385 /* A multiplication by a constant requires another instruction
9386 to load the constant to a register. */
9387 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9389 else if (const_ok_for_arm (~INTVAL (x
)))
9390 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9391 else if (const_ok_for_arm (-INTVAL (x
)))
9393 if (outer_code
== COMPARE
|| outer_code
== PLUS
9394 || outer_code
== MINUS
)
9397 *total
= COSTS_N_INSNS (1);
9400 *total
= COSTS_N_INSNS (2);
9406 *total
= COSTS_N_INSNS (2);
9410 *total
= COSTS_N_INSNS (4);
9415 && TARGET_HARD_FLOAT
9416 && outer_code
== SET
9417 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9418 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9419 *total
= COSTS_N_INSNS (1);
9421 *total
= COSTS_N_INSNS (4);
9426 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9427 cost of these slightly. */
9428 *total
= COSTS_N_INSNS (1) + 1;
9435 if (mode
!= VOIDmode
)
9436 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9438 *total
= COSTS_N_INSNS (4); /* How knows? */
9443 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9444 operand, then return the operand that is being shifted. If the shift
9445 is not by a constant, then set SHIFT_REG to point to the operand.
9446 Return NULL if OP is not a shifter operand. */
9448 shifter_op_p (rtx op
, rtx
*shift_reg
)
9450 enum rtx_code code
= GET_CODE (op
);
9452 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9453 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9454 return XEXP (op
, 0);
9455 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9456 return XEXP (op
, 0);
9457 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9458 || code
== ASHIFTRT
)
9460 if (!CONST_INT_P (XEXP (op
, 1)))
9461 *shift_reg
= XEXP (op
, 1);
9462 return XEXP (op
, 0);
9469 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9471 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9472 rtx_code code
= GET_CODE (x
);
9473 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9475 switch (XINT (x
, 1))
9477 case UNSPEC_UNALIGNED_LOAD
:
9478 /* We can only do unaligned loads into the integer unit, and we can't
9480 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9482 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9483 + extra_cost
->ldst
.load_unaligned
);
9486 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9487 ADDR_SPACE_GENERIC
, speed_p
);
9491 case UNSPEC_UNALIGNED_STORE
:
9492 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9494 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9495 + extra_cost
->ldst
.store_unaligned
);
9497 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9499 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9500 ADDR_SPACE_GENERIC
, speed_p
);
9511 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9515 *cost
= COSTS_N_INSNS (2);
9521 /* Cost of a libcall. We assume one insn per argument, an amount for the
9522 call (one insn for -Os) and then one for processing the result. */
9523 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9525 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9528 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9529 if (shift_op != NULL \
9530 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9535 *cost += extra_cost->alu.arith_shift_reg; \
9536 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9537 ASHIFT, 1, speed_p); \
9540 *cost += extra_cost->alu.arith_shift; \
9542 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9543 ASHIFT, 0, speed_p) \
9544 + rtx_cost (XEXP (x, 1 - IDX), \
9545 GET_MODE (shift_op), \
9552 /* RTX costs. Make an estimate of the cost of executing the operation
9553 X, which is contained with an operation with code OUTER_CODE.
9554 SPEED_P indicates whether the cost desired is the performance cost,
9555 or the size cost. The estimate is stored in COST and the return
9556 value is TRUE if the cost calculation is final, or FALSE if the
9557 caller should recurse through the operands of X to add additional
9560 We currently make no attempt to model the size savings of Thumb-2
9561 16-bit instructions. At the normal points in compilation where
9562 this code is called we have no measure of whether the condition
9563 flags are live or not, and thus no realistic way to determine what
9564 the size will eventually be. */
9566 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9567 const struct cpu_cost_table
*extra_cost
,
9568 int *cost
, bool speed_p
)
9570 machine_mode mode
= GET_MODE (x
);
9572 *cost
= COSTS_N_INSNS (1);
9577 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9579 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9587 /* SET RTXs don't have a mode so we get it from the destination. */
9588 mode
= GET_MODE (SET_DEST (x
));
9590 if (REG_P (SET_SRC (x
))
9591 && REG_P (SET_DEST (x
)))
9593 /* Assume that most copies can be done with a single insn,
9594 unless we don't have HW FP, in which case everything
9595 larger than word mode will require two insns. */
9596 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9597 && GET_MODE_SIZE (mode
) > 4)
9600 /* Conditional register moves can be encoded
9601 in 16 bits in Thumb mode. */
9602 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9608 if (CONST_INT_P (SET_SRC (x
)))
9610 /* Handle CONST_INT here, since the value doesn't have a mode
9611 and we would otherwise be unable to work out the true cost. */
9612 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9615 /* Slightly lower the cost of setting a core reg to a constant.
9616 This helps break up chains and allows for better scheduling. */
9617 if (REG_P (SET_DEST (x
))
9618 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9621 /* Immediate moves with an immediate in the range [0, 255] can be
9622 encoded in 16 bits in Thumb mode. */
9623 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9624 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9626 goto const_int_cost
;
9632 /* A memory access costs 1 insn if the mode is small, or the address is
9633 a single register, otherwise it costs one insn per word. */
9634 if (REG_P (XEXP (x
, 0)))
9635 *cost
= COSTS_N_INSNS (1);
9637 && GET_CODE (XEXP (x
, 0)) == PLUS
9638 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9639 /* This will be split into two instructions.
9640 See arm.md:calculate_pic_address. */
9641 *cost
= COSTS_N_INSNS (2);
9643 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9645 /* For speed optimizations, add the costs of the address and
9646 accessing memory. */
9649 *cost
+= (extra_cost
->ldst
.load
9650 + arm_address_cost (XEXP (x
, 0), mode
,
9651 ADDR_SPACE_GENERIC
, speed_p
));
9653 *cost
+= extra_cost
->ldst
.load
;
9659 /* Calculations of LDM costs are complex. We assume an initial cost
9660 (ldm_1st) which will load the number of registers mentioned in
9661 ldm_regs_per_insn_1st registers; then each additional
9662 ldm_regs_per_insn_subsequent registers cost one more insn. The
9663 formula for N regs is thus:
9665 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9666 + ldm_regs_per_insn_subsequent - 1)
9667 / ldm_regs_per_insn_subsequent).
9669 Additional costs may also be added for addressing. A similar
9670 formula is used for STM. */
9672 bool is_ldm
= load_multiple_operation (x
, SImode
);
9673 bool is_stm
= store_multiple_operation (x
, SImode
);
9675 if (is_ldm
|| is_stm
)
9679 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9680 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9681 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9682 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9683 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9684 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9685 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9687 *cost
+= regs_per_insn_1st
9688 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9689 + regs_per_insn_sub
- 1)
9690 / regs_per_insn_sub
);
9699 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9700 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9701 *cost
+= COSTS_N_INSNS (speed_p
9702 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9703 else if (mode
== SImode
&& TARGET_IDIV
)
9704 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9706 *cost
= LIBCALL_COST (2);
9707 return false; /* All arguments must be in registers. */
9710 /* MOD by a power of 2 can be expanded as:
9712 and r0, r0, #(n - 1)
9713 and r1, r1, #(n - 1)
9714 rsbpl r0, r1, #0. */
9715 if (CONST_INT_P (XEXP (x
, 1))
9716 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9719 *cost
+= COSTS_N_INSNS (3);
9722 *cost
+= 2 * extra_cost
->alu
.logical
9723 + extra_cost
->alu
.arith
;
9729 *cost
= LIBCALL_COST (2);
9730 return false; /* All arguments must be in registers. */
9733 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9735 *cost
+= (COSTS_N_INSNS (1)
9736 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9738 *cost
+= extra_cost
->alu
.shift_reg
;
9746 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9748 *cost
+= (COSTS_N_INSNS (2)
9749 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9751 *cost
+= 2 * extra_cost
->alu
.shift
;
9754 else if (mode
== SImode
)
9756 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9757 /* Slightly disparage register shifts at -Os, but not by much. */
9758 if (!CONST_INT_P (XEXP (x
, 1)))
9759 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9760 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9763 else if (GET_MODE_CLASS (mode
) == MODE_INT
9764 && GET_MODE_SIZE (mode
) < 4)
9768 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9769 /* Slightly disparage register shifts at -Os, but not by
9771 if (!CONST_INT_P (XEXP (x
, 1)))
9772 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9773 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9775 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9777 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9779 /* Can use SBFX/UBFX. */
9781 *cost
+= extra_cost
->alu
.bfx
;
9782 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9786 *cost
+= COSTS_N_INSNS (1);
9787 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9790 if (CONST_INT_P (XEXP (x
, 1)))
9791 *cost
+= 2 * extra_cost
->alu
.shift
;
9793 *cost
+= (extra_cost
->alu
.shift
9794 + extra_cost
->alu
.shift_reg
);
9797 /* Slightly disparage register shifts. */
9798 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9803 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9804 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9807 if (CONST_INT_P (XEXP (x
, 1)))
9808 *cost
+= (2 * extra_cost
->alu
.shift
9809 + extra_cost
->alu
.log_shift
);
9811 *cost
+= (extra_cost
->alu
.shift
9812 + extra_cost
->alu
.shift_reg
9813 + extra_cost
->alu
.log_shift_reg
);
9819 *cost
= LIBCALL_COST (2);
9828 *cost
+= extra_cost
->alu
.rev
;
9835 /* No rev instruction available. Look at arm_legacy_rev
9836 and thumb_legacy_rev for the form of RTL used then. */
9839 *cost
+= COSTS_N_INSNS (9);
9843 *cost
+= 6 * extra_cost
->alu
.shift
;
9844 *cost
+= 3 * extra_cost
->alu
.logical
;
9849 *cost
+= COSTS_N_INSNS (4);
9853 *cost
+= 2 * extra_cost
->alu
.shift
;
9854 *cost
+= extra_cost
->alu
.arith_shift
;
9855 *cost
+= 2 * extra_cost
->alu
.logical
;
9863 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9864 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9866 if (GET_CODE (XEXP (x
, 0)) == MULT
9867 || GET_CODE (XEXP (x
, 1)) == MULT
)
9869 rtx mul_op0
, mul_op1
, sub_op
;
9872 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9874 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9876 mul_op0
= XEXP (XEXP (x
, 0), 0);
9877 mul_op1
= XEXP (XEXP (x
, 0), 1);
9878 sub_op
= XEXP (x
, 1);
9882 mul_op0
= XEXP (XEXP (x
, 1), 0);
9883 mul_op1
= XEXP (XEXP (x
, 1), 1);
9884 sub_op
= XEXP (x
, 0);
9887 /* The first operand of the multiply may be optionally
9889 if (GET_CODE (mul_op0
) == NEG
)
9890 mul_op0
= XEXP (mul_op0
, 0);
9892 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9893 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9894 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9900 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9906 rtx shift_by_reg
= NULL
;
9910 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9911 if (shift_op
== NULL
)
9913 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9914 non_shift_op
= XEXP (x
, 0);
9917 non_shift_op
= XEXP (x
, 1);
9919 if (shift_op
!= NULL
)
9921 if (shift_by_reg
!= NULL
)
9924 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9925 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9928 *cost
+= extra_cost
->alu
.arith_shift
;
9930 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9931 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9936 && GET_CODE (XEXP (x
, 1)) == MULT
)
9940 *cost
+= extra_cost
->mult
[0].add
;
9941 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9942 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9943 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9947 if (CONST_INT_P (XEXP (x
, 0)))
9949 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9950 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9952 *cost
= COSTS_N_INSNS (insns
);
9954 *cost
+= insns
* extra_cost
->alu
.arith
;
9955 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9959 *cost
+= extra_cost
->alu
.arith
;
9964 if (GET_MODE_CLASS (mode
) == MODE_INT
9965 && GET_MODE_SIZE (mode
) < 4)
9967 rtx shift_op
, shift_reg
;
9970 /* We check both sides of the MINUS for shifter operands since,
9971 unlike PLUS, it's not commutative. */
9973 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9974 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9976 /* Slightly disparage, as we might need to widen the result. */
9979 *cost
+= extra_cost
->alu
.arith
;
9981 if (CONST_INT_P (XEXP (x
, 0)))
9983 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9992 *cost
+= COSTS_N_INSNS (1);
9994 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9996 rtx op1
= XEXP (x
, 1);
9999 *cost
+= 2 * extra_cost
->alu
.arith
;
10001 if (GET_CODE (op1
) == ZERO_EXTEND
)
10002 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10005 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10006 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10010 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10013 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10014 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10016 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10019 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10020 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10023 *cost
+= (extra_cost
->alu
.arith
10024 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10025 ? extra_cost
->alu
.arith
10026 : extra_cost
->alu
.arith_shift
));
10027 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10028 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10029 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10034 *cost
+= 2 * extra_cost
->alu
.arith
;
10040 *cost
= LIBCALL_COST (2);
10044 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10045 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10047 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10049 rtx mul_op0
, mul_op1
, add_op
;
10052 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10054 mul_op0
= XEXP (XEXP (x
, 0), 0);
10055 mul_op1
= XEXP (XEXP (x
, 0), 1);
10056 add_op
= XEXP (x
, 1);
10058 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10059 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10060 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10066 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10069 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10071 *cost
= LIBCALL_COST (2);
10075 /* Narrow modes can be synthesized in SImode, but the range
10076 of useful sub-operations is limited. Check for shift operations
10077 on one of the operands. Only left shifts can be used in the
10079 if (GET_MODE_CLASS (mode
) == MODE_INT
10080 && GET_MODE_SIZE (mode
) < 4)
10082 rtx shift_op
, shift_reg
;
10085 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
10087 if (CONST_INT_P (XEXP (x
, 1)))
10089 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10090 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10092 *cost
= COSTS_N_INSNS (insns
);
10094 *cost
+= insns
* extra_cost
->alu
.arith
;
10095 /* Slightly penalize a narrow operation as the result may
10097 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10101 /* Slightly penalize a narrow operation as the result may
10105 *cost
+= extra_cost
->alu
.arith
;
10110 if (mode
== SImode
)
10112 rtx shift_op
, shift_reg
;
10114 if (TARGET_INT_SIMD
10115 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10116 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10118 /* UXTA[BH] or SXTA[BH]. */
10120 *cost
+= extra_cost
->alu
.extend_arith
;
10121 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10123 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10128 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10129 if (shift_op
!= NULL
)
10134 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10135 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10138 *cost
+= extra_cost
->alu
.arith_shift
;
10140 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10141 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10144 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10146 rtx mul_op
= XEXP (x
, 0);
10148 if (TARGET_DSP_MULTIPLY
10149 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10150 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10151 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10152 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10153 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10154 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10155 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10156 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10157 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10158 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10159 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10160 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10163 /* SMLA[BT][BT]. */
10165 *cost
+= extra_cost
->mult
[0].extend_add
;
10166 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10167 SIGN_EXTEND
, 0, speed_p
)
10168 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10169 SIGN_EXTEND
, 0, speed_p
)
10170 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10175 *cost
+= extra_cost
->mult
[0].add
;
10176 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10177 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10178 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10181 if (CONST_INT_P (XEXP (x
, 1)))
10183 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10184 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10186 *cost
= COSTS_N_INSNS (insns
);
10188 *cost
+= insns
* extra_cost
->alu
.arith
;
10189 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10193 *cost
+= extra_cost
->alu
.arith
;
10198 if (mode
== DImode
)
10201 && GET_CODE (XEXP (x
, 0)) == MULT
10202 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10203 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10204 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10205 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10208 *cost
+= extra_cost
->mult
[1].extend_add
;
10209 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10210 ZERO_EXTEND
, 0, speed_p
)
10211 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10212 ZERO_EXTEND
, 0, speed_p
)
10213 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10217 *cost
+= COSTS_N_INSNS (1);
10219 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10220 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10223 *cost
+= (extra_cost
->alu
.arith
10224 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10225 ? extra_cost
->alu
.arith
10226 : extra_cost
->alu
.arith_shift
));
10228 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10230 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10235 *cost
+= 2 * extra_cost
->alu
.arith
;
10240 *cost
= LIBCALL_COST (2);
10243 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10246 *cost
+= extra_cost
->alu
.rev
;
10250 /* Fall through. */
10251 case AND
: case XOR
:
10252 if (mode
== SImode
)
10254 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10255 rtx op0
= XEXP (x
, 0);
10256 rtx shift_op
, shift_reg
;
10260 || (code
== IOR
&& TARGET_THUMB2
)))
10261 op0
= XEXP (op0
, 0);
10264 shift_op
= shifter_op_p (op0
, &shift_reg
);
10265 if (shift_op
!= NULL
)
10270 *cost
+= extra_cost
->alu
.log_shift_reg
;
10271 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10274 *cost
+= extra_cost
->alu
.log_shift
;
10276 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10277 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10281 if (CONST_INT_P (XEXP (x
, 1)))
10283 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10284 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10287 *cost
= COSTS_N_INSNS (insns
);
10289 *cost
+= insns
* extra_cost
->alu
.logical
;
10290 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10295 *cost
+= extra_cost
->alu
.logical
;
10296 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10297 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10301 if (mode
== DImode
)
10303 rtx op0
= XEXP (x
, 0);
10304 enum rtx_code subcode
= GET_CODE (op0
);
10306 *cost
+= COSTS_N_INSNS (1);
10310 || (code
== IOR
&& TARGET_THUMB2
)))
10311 op0
= XEXP (op0
, 0);
10313 if (GET_CODE (op0
) == ZERO_EXTEND
)
10316 *cost
+= 2 * extra_cost
->alu
.logical
;
10318 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10320 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10323 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10326 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10328 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10330 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10335 *cost
+= 2 * extra_cost
->alu
.logical
;
10341 *cost
= LIBCALL_COST (2);
10345 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10346 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10348 rtx op0
= XEXP (x
, 0);
10350 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10351 op0
= XEXP (op0
, 0);
10354 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10356 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10357 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10360 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10362 *cost
= LIBCALL_COST (2);
10366 if (mode
== SImode
)
10368 if (TARGET_DSP_MULTIPLY
10369 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10370 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10371 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10372 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10373 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10374 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10375 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10376 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10377 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10378 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10379 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10380 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10383 /* SMUL[TB][TB]. */
10385 *cost
+= extra_cost
->mult
[0].extend
;
10386 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10387 SIGN_EXTEND
, 0, speed_p
);
10388 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10389 SIGN_EXTEND
, 1, speed_p
);
10393 *cost
+= extra_cost
->mult
[0].simple
;
10397 if (mode
== DImode
)
10400 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10401 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10402 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10403 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10406 *cost
+= extra_cost
->mult
[1].extend
;
10407 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10408 ZERO_EXTEND
, 0, speed_p
)
10409 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10410 ZERO_EXTEND
, 0, speed_p
));
10414 *cost
= LIBCALL_COST (2);
10419 *cost
= LIBCALL_COST (2);
10423 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10424 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10426 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10429 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10434 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10438 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10440 *cost
= LIBCALL_COST (1);
10444 if (mode
== SImode
)
10446 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10448 *cost
+= COSTS_N_INSNS (1);
10449 /* Assume the non-flag-changing variant. */
10451 *cost
+= (extra_cost
->alu
.log_shift
10452 + extra_cost
->alu
.arith_shift
);
10453 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10457 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10458 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10460 *cost
+= COSTS_N_INSNS (1);
10461 /* No extra cost for MOV imm and MVN imm. */
10462 /* If the comparison op is using the flags, there's no further
10463 cost, otherwise we need to add the cost of the comparison. */
10464 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10465 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10466 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10468 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10469 *cost
+= (COSTS_N_INSNS (1)
10470 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10472 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10475 *cost
+= extra_cost
->alu
.arith
;
10481 *cost
+= extra_cost
->alu
.arith
;
10485 if (GET_MODE_CLASS (mode
) == MODE_INT
10486 && GET_MODE_SIZE (mode
) < 4)
10488 /* Slightly disparage, as we might need an extend operation. */
10491 *cost
+= extra_cost
->alu
.arith
;
10495 if (mode
== DImode
)
10497 *cost
+= COSTS_N_INSNS (1);
10499 *cost
+= 2 * extra_cost
->alu
.arith
;
10504 *cost
= LIBCALL_COST (1);
10508 if (mode
== SImode
)
10511 rtx shift_reg
= NULL
;
10513 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10517 if (shift_reg
!= NULL
)
10520 *cost
+= extra_cost
->alu
.log_shift_reg
;
10521 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10524 *cost
+= extra_cost
->alu
.log_shift
;
10525 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10530 *cost
+= extra_cost
->alu
.logical
;
10533 if (mode
== DImode
)
10535 *cost
+= COSTS_N_INSNS (1);
10541 *cost
+= LIBCALL_COST (1);
10546 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10548 *cost
+= COSTS_N_INSNS (3);
10551 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10552 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10554 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10555 /* Assume that if one arm of the if_then_else is a register,
10556 that it will be tied with the result and eliminate the
10557 conditional insn. */
10558 if (REG_P (XEXP (x
, 1)))
10560 else if (REG_P (XEXP (x
, 2)))
10566 if (extra_cost
->alu
.non_exec_costs_exec
)
10567 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10569 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10572 *cost
+= op1cost
+ op2cost
;
10578 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10582 machine_mode op0mode
;
10583 /* We'll mostly assume that the cost of a compare is the cost of the
10584 LHS. However, there are some notable exceptions. */
10586 /* Floating point compares are never done as side-effects. */
10587 op0mode
= GET_MODE (XEXP (x
, 0));
10588 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10589 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10592 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10594 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10596 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10602 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10604 *cost
= LIBCALL_COST (2);
10608 /* DImode compares normally take two insns. */
10609 if (op0mode
== DImode
)
10611 *cost
+= COSTS_N_INSNS (1);
10613 *cost
+= 2 * extra_cost
->alu
.arith
;
10617 if (op0mode
== SImode
)
10622 if (XEXP (x
, 1) == const0_rtx
10623 && !(REG_P (XEXP (x
, 0))
10624 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10625 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10627 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10629 /* Multiply operations that set the flags are often
10630 significantly more expensive. */
10632 && GET_CODE (XEXP (x
, 0)) == MULT
10633 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10634 *cost
+= extra_cost
->mult
[0].flag_setting
;
10637 && GET_CODE (XEXP (x
, 0)) == PLUS
10638 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10639 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10641 *cost
+= extra_cost
->mult
[0].flag_setting
;
10646 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10647 if (shift_op
!= NULL
)
10649 if (shift_reg
!= NULL
)
10651 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10654 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10657 *cost
+= extra_cost
->alu
.arith_shift
;
10658 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10659 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10664 *cost
+= extra_cost
->alu
.arith
;
10665 if (CONST_INT_P (XEXP (x
, 1))
10666 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10668 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10676 *cost
= LIBCALL_COST (2);
10699 if (outer_code
== SET
)
10701 /* Is it a store-flag operation? */
10702 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10703 && XEXP (x
, 1) == const0_rtx
)
10705 /* Thumb also needs an IT insn. */
10706 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10709 if (XEXP (x
, 1) == const0_rtx
)
10714 /* LSR Rd, Rn, #31. */
10716 *cost
+= extra_cost
->alu
.shift
;
10726 *cost
+= COSTS_N_INSNS (1);
10730 /* RSBS T1, Rn, Rn, LSR #31
10732 *cost
+= COSTS_N_INSNS (1);
10734 *cost
+= extra_cost
->alu
.arith_shift
;
10738 /* RSB Rd, Rn, Rn, ASR #1
10739 LSR Rd, Rd, #31. */
10740 *cost
+= COSTS_N_INSNS (1);
10742 *cost
+= (extra_cost
->alu
.arith_shift
10743 + extra_cost
->alu
.shift
);
10749 *cost
+= COSTS_N_INSNS (1);
10751 *cost
+= extra_cost
->alu
.shift
;
10755 /* Remaining cases are either meaningless or would take
10756 three insns anyway. */
10757 *cost
= COSTS_N_INSNS (3);
10760 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10765 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10766 if (CONST_INT_P (XEXP (x
, 1))
10767 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10769 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10776 /* Not directly inside a set. If it involves the condition code
10777 register it must be the condition for a branch, cond_exec or
10778 I_T_E operation. Since the comparison is performed elsewhere
10779 this is just the control part which has no additional
10781 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10782 && XEXP (x
, 1) == const0_rtx
)
10790 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10791 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10794 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10798 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10800 *cost
= LIBCALL_COST (1);
10804 if (mode
== SImode
)
10807 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10811 *cost
= LIBCALL_COST (1);
10815 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10816 && MEM_P (XEXP (x
, 0)))
10818 if (mode
== DImode
)
10819 *cost
+= COSTS_N_INSNS (1);
10824 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10825 *cost
+= extra_cost
->ldst
.load
;
10827 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10829 if (mode
== DImode
)
10830 *cost
+= extra_cost
->alu
.shift
;
10835 /* Widening from less than 32-bits requires an extend operation. */
10836 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10838 /* We have SXTB/SXTH. */
10839 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10841 *cost
+= extra_cost
->alu
.extend
;
10843 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10845 /* Needs two shifts. */
10846 *cost
+= COSTS_N_INSNS (1);
10847 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10849 *cost
+= 2 * extra_cost
->alu
.shift
;
10852 /* Widening beyond 32-bits requires one more insn. */
10853 if (mode
== DImode
)
10855 *cost
+= COSTS_N_INSNS (1);
10857 *cost
+= extra_cost
->alu
.shift
;
10864 || GET_MODE (XEXP (x
, 0)) == SImode
10865 || GET_MODE (XEXP (x
, 0)) == QImode
)
10866 && MEM_P (XEXP (x
, 0)))
10868 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10870 if (mode
== DImode
)
10871 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10876 /* Widening from less than 32-bits requires an extend operation. */
10877 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10879 /* UXTB can be a shorter instruction in Thumb2, but it might
10880 be slower than the AND Rd, Rn, #255 alternative. When
10881 optimizing for speed it should never be slower to use
10882 AND, and we don't really model 16-bit vs 32-bit insns
10885 *cost
+= extra_cost
->alu
.logical
;
10887 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10889 /* We have UXTB/UXTH. */
10890 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10892 *cost
+= extra_cost
->alu
.extend
;
10894 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10896 /* Needs two shifts. It's marginally preferable to use
10897 shifts rather than two BIC instructions as the second
10898 shift may merge with a subsequent insn as a shifter
10900 *cost
= COSTS_N_INSNS (2);
10901 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10903 *cost
+= 2 * extra_cost
->alu
.shift
;
10906 /* Widening beyond 32-bits requires one more insn. */
10907 if (mode
== DImode
)
10909 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10916 /* CONST_INT has no mode, so we cannot tell for sure how many
10917 insns are really going to be needed. The best we can do is
10918 look at the value passed. If it fits in SImode, then assume
10919 that's the mode it will be used for. Otherwise assume it
10920 will be used in DImode. */
10921 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10926 /* Avoid blowing up in arm_gen_constant (). */
10927 if (!(outer_code
== PLUS
10928 || outer_code
== AND
10929 || outer_code
== IOR
10930 || outer_code
== XOR
10931 || outer_code
== MINUS
))
10935 if (mode
== SImode
)
10937 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10938 INTVAL (x
), NULL
, NULL
,
10944 *cost
+= COSTS_N_INSNS (arm_gen_constant
10945 (outer_code
, SImode
, NULL
,
10946 trunc_int_for_mode (INTVAL (x
), SImode
),
10948 + arm_gen_constant (outer_code
, SImode
, NULL
,
10949 INTVAL (x
) >> 32, NULL
,
10961 if (arm_arch_thumb2
&& !flag_pic
)
10962 *cost
+= COSTS_N_INSNS (1);
10964 *cost
+= extra_cost
->ldst
.load
;
10967 *cost
+= COSTS_N_INSNS (1);
10971 *cost
+= COSTS_N_INSNS (1);
10973 *cost
+= extra_cost
->alu
.arith
;
10979 *cost
= COSTS_N_INSNS (4);
10984 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10985 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10987 if (vfp3_const_double_rtx (x
))
10990 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10996 if (mode
== DFmode
)
10997 *cost
+= extra_cost
->ldst
.loadd
;
10999 *cost
+= extra_cost
->ldst
.loadf
;
11002 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11006 *cost
= COSTS_N_INSNS (4);
11012 && TARGET_HARD_FLOAT
11013 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11014 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11015 *cost
= COSTS_N_INSNS (1);
11017 *cost
= COSTS_N_INSNS (4);
11022 /* When optimizing for size, we prefer constant pool entries to
11023 MOVW/MOVT pairs, so bump the cost of these slightly. */
11030 *cost
+= extra_cost
->alu
.clz
;
11034 if (XEXP (x
, 1) == const0_rtx
)
11037 *cost
+= extra_cost
->alu
.log_shift
;
11038 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11041 /* Fall through. */
11045 *cost
+= COSTS_N_INSNS (1);
11049 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11050 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11051 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11052 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11053 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11054 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11055 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11056 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11060 *cost
+= extra_cost
->mult
[1].extend
;
11061 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11062 ZERO_EXTEND
, 0, speed_p
)
11063 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11064 ZERO_EXTEND
, 0, speed_p
));
11067 *cost
= LIBCALL_COST (1);
11070 case UNSPEC_VOLATILE
:
11072 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11075 /* Reading the PC is like reading any other register. Writing it
11076 is more expensive, but we take that into account elsewhere. */
11081 /* TODO: Simple zero_extract of bottom bits using AND. */
11082 /* Fall through. */
11086 && CONST_INT_P (XEXP (x
, 1))
11087 && CONST_INT_P (XEXP (x
, 2)))
11090 *cost
+= extra_cost
->alu
.bfx
;
11091 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11094 /* Without UBFX/SBFX, need to resort to shift operations. */
11095 *cost
+= COSTS_N_INSNS (1);
11097 *cost
+= 2 * extra_cost
->alu
.shift
;
11098 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11102 if (TARGET_HARD_FLOAT
)
11105 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11106 if (!TARGET_FPU_ARMV8
11107 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11109 /* Pre v8, widening HF->DF is a two-step process, first
11110 widening to SFmode. */
11111 *cost
+= COSTS_N_INSNS (1);
11113 *cost
+= extra_cost
->fp
[0].widen
;
11115 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11119 *cost
= LIBCALL_COST (1);
11122 case FLOAT_TRUNCATE
:
11123 if (TARGET_HARD_FLOAT
)
11126 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11127 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11129 /* Vector modes? */
11131 *cost
= LIBCALL_COST (1);
11135 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11137 rtx op0
= XEXP (x
, 0);
11138 rtx op1
= XEXP (x
, 1);
11139 rtx op2
= XEXP (x
, 2);
11142 /* vfms or vfnma. */
11143 if (GET_CODE (op0
) == NEG
)
11144 op0
= XEXP (op0
, 0);
11146 /* vfnms or vfnma. */
11147 if (GET_CODE (op2
) == NEG
)
11148 op2
= XEXP (op2
, 0);
11150 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11151 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11152 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11155 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11160 *cost
= LIBCALL_COST (3);
11165 if (TARGET_HARD_FLOAT
)
11167 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11168 a vcvt fixed-point conversion. */
11169 if (code
== FIX
&& mode
== SImode
11170 && GET_CODE (XEXP (x
, 0)) == FIX
11171 && GET_MODE (XEXP (x
, 0)) == SFmode
11172 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11173 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11177 *cost
+= extra_cost
->fp
[0].toint
;
11179 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11184 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11186 mode
= GET_MODE (XEXP (x
, 0));
11188 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11189 /* Strip of the 'cost' of rounding towards zero. */
11190 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11191 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11194 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11195 /* ??? Increase the cost to deal with transferring from
11196 FP -> CORE registers? */
11199 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11200 && TARGET_FPU_ARMV8
)
11203 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11206 /* Vector costs? */
11208 *cost
= LIBCALL_COST (1);
11212 case UNSIGNED_FLOAT
:
11213 if (TARGET_HARD_FLOAT
)
11215 /* ??? Increase the cost to deal with transferring from CORE
11216 -> FP registers? */
11218 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11221 *cost
= LIBCALL_COST (1);
11229 /* Just a guess. Guess number of instructions in the asm
11230 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11231 though (see PR60663). */
11232 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11233 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11235 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11239 if (mode
!= VOIDmode
)
11240 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11242 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11247 #undef HANDLE_NARROW_SHIFT_ARITH
11249 /* RTX costs when optimizing for size. */
11251 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11252 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11255 int code
= GET_CODE (x
);
11257 if (TARGET_OLD_RTX_COSTS
11258 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11260 /* Old way. (Deprecated.) */
11262 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11263 (enum rtx_code
) outer_code
, total
);
11265 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11266 (enum rtx_code
) outer_code
, total
,
11272 if (current_tune
->insn_extra_cost
)
11273 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11274 (enum rtx_code
) outer_code
,
11275 current_tune
->insn_extra_cost
,
11277 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11278 && current_tune->insn_extra_cost != NULL */
11280 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11281 (enum rtx_code
) outer_code
,
11282 &generic_extra_costs
, total
, speed
);
11285 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11287 print_rtl_single (dump_file
, x
);
11288 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11289 *total
, result
? "final" : "partial");
11294 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11295 supported on any "slowmul" cores, so it can be ignored. */
11298 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11299 int *total
, bool speed
)
11301 machine_mode mode
= GET_MODE (x
);
11305 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11312 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11315 *total
= COSTS_N_INSNS (20);
11319 if (CONST_INT_P (XEXP (x
, 1)))
11321 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11322 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11323 int cost
, const_ok
= const_ok_for_arm (i
);
11324 int j
, booth_unit_size
;
11326 /* Tune as appropriate. */
11327 cost
= const_ok
? 4 : 8;
11328 booth_unit_size
= 2;
11329 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11331 i
>>= booth_unit_size
;
11335 *total
= COSTS_N_INSNS (cost
);
11336 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
11340 *total
= COSTS_N_INSNS (20);
11344 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11349 /* RTX cost for cores with a fast multiply unit (M variants). */
11352 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11353 int *total
, bool speed
)
11355 machine_mode mode
= GET_MODE (x
);
11359 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11363 /* ??? should thumb2 use different costs? */
11367 /* There is no point basing this on the tuning, since it is always the
11368 fast variant if it exists at all. */
11370 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11371 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11372 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11374 *total
= COSTS_N_INSNS(2);
11379 if (mode
== DImode
)
11381 *total
= COSTS_N_INSNS (5);
11385 if (CONST_INT_P (XEXP (x
, 1)))
11387 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11388 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11389 int cost
, const_ok
= const_ok_for_arm (i
);
11390 int j
, booth_unit_size
;
11392 /* Tune as appropriate. */
11393 cost
= const_ok
? 4 : 8;
11394 booth_unit_size
= 8;
11395 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11397 i
>>= booth_unit_size
;
11401 *total
= COSTS_N_INSNS(cost
);
11405 if (mode
== SImode
)
11407 *total
= COSTS_N_INSNS (4);
11411 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11413 if (TARGET_HARD_FLOAT
11415 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11417 *total
= COSTS_N_INSNS (1);
11422 /* Requires a lib call */
11423 *total
= COSTS_N_INSNS (20);
11427 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11432 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11433 so it can be ignored. */
11436 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11437 int *total
, bool speed
)
11439 machine_mode mode
= GET_MODE (x
);
11443 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11450 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11451 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11453 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11454 will stall until the multiplication is complete. */
11455 *total
= COSTS_N_INSNS (3);
11459 /* There is no point basing this on the tuning, since it is always the
11460 fast variant if it exists at all. */
11462 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11463 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11464 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11466 *total
= COSTS_N_INSNS (2);
11471 if (mode
== DImode
)
11473 *total
= COSTS_N_INSNS (5);
11477 if (CONST_INT_P (XEXP (x
, 1)))
11479 /* If operand 1 is a constant we can more accurately
11480 calculate the cost of the multiply. The multiplier can
11481 retire 15 bits on the first cycle and a further 12 on the
11482 second. We do, of course, have to load the constant into
11483 a register first. */
11484 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11485 /* There's a general overhead of one cycle. */
11487 unsigned HOST_WIDE_INT masked_const
;
11489 if (i
& 0x80000000)
11492 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11494 masked_const
= i
& 0xffff8000;
11495 if (masked_const
!= 0)
11498 masked_const
= i
& 0xf8000000;
11499 if (masked_const
!= 0)
11502 *total
= COSTS_N_INSNS (cost
);
11506 if (mode
== SImode
)
11508 *total
= COSTS_N_INSNS (3);
11512 /* Requires a lib call */
11513 *total
= COSTS_N_INSNS (20);
11517 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11522 /* RTX costs for 9e (and later) cores. */
11525 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11526 int *total
, bool speed
)
11528 machine_mode mode
= GET_MODE (x
);
11535 /* Small multiply: 32 cycles for an integer multiply inst. */
11536 if (arm_arch6m
&& arm_m_profile_small_mul
)
11537 *total
= COSTS_N_INSNS (32);
11539 *total
= COSTS_N_INSNS (3);
11543 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11551 /* There is no point basing this on the tuning, since it is always the
11552 fast variant if it exists at all. */
11554 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11555 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11556 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11558 *total
= COSTS_N_INSNS (2);
11563 if (mode
== DImode
)
11565 *total
= COSTS_N_INSNS (5);
11569 if (mode
== SImode
)
11571 *total
= COSTS_N_INSNS (2);
11575 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11577 if (TARGET_HARD_FLOAT
11579 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11581 *total
= COSTS_N_INSNS (1);
11586 *total
= COSTS_N_INSNS (20);
11590 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11593 /* All address computations that can be done are free, but rtx cost returns
11594 the same for practically all of them. So we weight the different types
11595 of address here in the order (most pref first):
11596 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11598 arm_arm_address_cost (rtx x
)
11600 enum rtx_code c
= GET_CODE (x
);
11602 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11604 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11609 if (CONST_INT_P (XEXP (x
, 1)))
11612 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11622 arm_thumb_address_cost (rtx x
)
11624 enum rtx_code c
= GET_CODE (x
);
11629 && REG_P (XEXP (x
, 0))
11630 && CONST_INT_P (XEXP (x
, 1)))
11637 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11638 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11640 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11643 /* Adjust cost hook for XScale. */
11645 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11647 /* Some true dependencies can have a higher cost depending
11648 on precisely how certain input operands are used. */
11649 if (REG_NOTE_KIND(link
) == 0
11650 && recog_memoized (insn
) >= 0
11651 && recog_memoized (dep
) >= 0)
11653 int shift_opnum
= get_attr_shift (insn
);
11654 enum attr_type attr_type
= get_attr_type (dep
);
11656 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11657 operand for INSN. If we have a shifted input operand and the
11658 instruction we depend on is another ALU instruction, then we may
11659 have to account for an additional stall. */
11660 if (shift_opnum
!= 0
11661 && (attr_type
== TYPE_ALU_SHIFT_IMM
11662 || attr_type
== TYPE_ALUS_SHIFT_IMM
11663 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11664 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11665 || attr_type
== TYPE_ALU_SHIFT_REG
11666 || attr_type
== TYPE_ALUS_SHIFT_REG
11667 || attr_type
== TYPE_LOGIC_SHIFT_REG
11668 || attr_type
== TYPE_LOGICS_SHIFT_REG
11669 || attr_type
== TYPE_MOV_SHIFT
11670 || attr_type
== TYPE_MVN_SHIFT
11671 || attr_type
== TYPE_MOV_SHIFT_REG
11672 || attr_type
== TYPE_MVN_SHIFT_REG
))
11674 rtx shifted_operand
;
11677 /* Get the shifted operand. */
11678 extract_insn (insn
);
11679 shifted_operand
= recog_data
.operand
[shift_opnum
];
11681 /* Iterate over all the operands in DEP. If we write an operand
11682 that overlaps with SHIFTED_OPERAND, then we have increase the
11683 cost of this dependency. */
11684 extract_insn (dep
);
11685 preprocess_constraints (dep
);
11686 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11688 /* We can ignore strict inputs. */
11689 if (recog_data
.operand_type
[opno
] == OP_IN
)
11692 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11704 /* Adjust cost hook for Cortex A9. */
11706 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11708 switch (REG_NOTE_KIND (link
))
11715 case REG_DEP_OUTPUT
:
11716 if (recog_memoized (insn
) >= 0
11717 && recog_memoized (dep
) >= 0)
11719 if (GET_CODE (PATTERN (insn
)) == SET
)
11722 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11724 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11726 enum attr_type attr_type_insn
= get_attr_type (insn
);
11727 enum attr_type attr_type_dep
= get_attr_type (dep
);
11729 /* By default all dependencies of the form
11732 have an extra latency of 1 cycle because
11733 of the input and output dependency in this
11734 case. However this gets modeled as an true
11735 dependency and hence all these checks. */
11736 if (REG_P (SET_DEST (PATTERN (insn
)))
11737 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11739 /* FMACS is a special case where the dependent
11740 instruction can be issued 3 cycles before
11741 the normal latency in case of an output
11743 if ((attr_type_insn
== TYPE_FMACS
11744 || attr_type_insn
== TYPE_FMACD
)
11745 && (attr_type_dep
== TYPE_FMACS
11746 || attr_type_dep
== TYPE_FMACD
))
11748 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11749 *cost
= insn_default_latency (dep
) - 3;
11751 *cost
= insn_default_latency (dep
);
11756 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11757 *cost
= insn_default_latency (dep
) + 1;
11759 *cost
= insn_default_latency (dep
);
11769 gcc_unreachable ();
11775 /* Adjust cost hook for FA726TE. */
11777 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11779 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11780 have penalty of 3. */
11781 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11782 && recog_memoized (insn
) >= 0
11783 && recog_memoized (dep
) >= 0
11784 && get_attr_conds (dep
) == CONDS_SET
)
11786 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11787 if (get_attr_conds (insn
) == CONDS_USE
11788 && get_attr_type (insn
) != TYPE_BRANCH
)
11794 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11795 || get_attr_conds (insn
) == CONDS_USE
)
11805 /* Implement TARGET_REGISTER_MOVE_COST.
11807 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11808 it is typically more expensive than a single memory access. We set
11809 the cost to less than two memory accesses so that floating
11810 point to integer conversion does not go through memory. */
11813 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11814 reg_class_t from
, reg_class_t to
)
11818 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11819 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11821 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11822 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11824 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11831 if (from
== HI_REGS
|| to
== HI_REGS
)
11838 /* Implement TARGET_MEMORY_MOVE_COST. */
11841 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11842 bool in ATTRIBUTE_UNUSED
)
11848 if (GET_MODE_SIZE (mode
) < 4)
11851 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11855 /* Vectorizer cost model implementation. */
11857 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11859 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11861 int misalign ATTRIBUTE_UNUSED
)
11865 switch (type_of_cost
)
11868 return current_tune
->vec_costs
->scalar_stmt_cost
;
11871 return current_tune
->vec_costs
->scalar_load_cost
;
11874 return current_tune
->vec_costs
->scalar_store_cost
;
11877 return current_tune
->vec_costs
->vec_stmt_cost
;
11880 return current_tune
->vec_costs
->vec_align_load_cost
;
11883 return current_tune
->vec_costs
->vec_store_cost
;
11885 case vec_to_scalar
:
11886 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11888 case scalar_to_vec
:
11889 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11891 case unaligned_load
:
11892 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11894 case unaligned_store
:
11895 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11897 case cond_branch_taken
:
11898 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11900 case cond_branch_not_taken
:
11901 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11904 case vec_promote_demote
:
11905 return current_tune
->vec_costs
->vec_stmt_cost
;
11907 case vec_construct
:
11908 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11909 return elements
/ 2 + 1;
11912 gcc_unreachable ();
11916 /* Implement targetm.vectorize.add_stmt_cost. */
11919 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11920 struct _stmt_vec_info
*stmt_info
, int misalign
,
11921 enum vect_cost_model_location where
)
11923 unsigned *cost
= (unsigned *) data
;
11924 unsigned retval
= 0;
11926 if (flag_vect_cost_model
)
11928 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11929 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11931 /* Statements in an inner loop relative to the loop being
11932 vectorized are weighted more heavily. The value here is
11933 arbitrary and could potentially be improved with analysis. */
11934 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11935 count
*= 50; /* FIXME. */
11937 retval
= (unsigned) (count
* stmt_cost
);
11938 cost
[where
] += retval
;
11944 /* Return true if and only if this insn can dual-issue only as older. */
11946 cortexa7_older_only (rtx_insn
*insn
)
11948 if (recog_memoized (insn
) < 0)
11951 switch (get_attr_type (insn
))
11953 case TYPE_ALU_DSP_REG
:
11954 case TYPE_ALU_SREG
:
11955 case TYPE_ALUS_SREG
:
11956 case TYPE_LOGIC_REG
:
11957 case TYPE_LOGICS_REG
:
11959 case TYPE_ADCS_REG
:
11964 case TYPE_SHIFT_IMM
:
11965 case TYPE_SHIFT_REG
:
11966 case TYPE_LOAD_BYTE
:
11969 case TYPE_FFARITHS
:
11971 case TYPE_FFARITHD
:
11989 case TYPE_F_STORES
:
11996 /* Return true if and only if this insn can dual-issue as younger. */
11998 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
12000 if (recog_memoized (insn
) < 0)
12003 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
12007 switch (get_attr_type (insn
))
12010 case TYPE_ALUS_IMM
:
12011 case TYPE_LOGIC_IMM
:
12012 case TYPE_LOGICS_IMM
:
12017 case TYPE_MOV_SHIFT
:
12018 case TYPE_MOV_SHIFT_REG
:
12028 /* Look for an instruction that can dual issue only as an older
12029 instruction, and move it in front of any instructions that can
12030 dual-issue as younger, while preserving the relative order of all
12031 other instructions in the ready list. This is a hueuristic to help
12032 dual-issue in later cycles, by postponing issue of more flexible
12033 instructions. This heuristic may affect dual issue opportunities
12034 in the current cycle. */
12036 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
12037 int *n_readyp
, int clock
)
12040 int first_older_only
= -1, first_younger
= -1;
12044 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12048 /* Traverse the ready list from the head (the instruction to issue
12049 first), and looking for the first instruction that can issue as
12050 younger and the first instruction that can dual-issue only as
12052 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12054 rtx_insn
*insn
= ready
[i
];
12055 if (cortexa7_older_only (insn
))
12057 first_older_only
= i
;
12059 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12062 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12066 /* Nothing to reorder because either no younger insn found or insn
12067 that can dual-issue only as older appears before any insn that
12068 can dual-issue as younger. */
12069 if (first_younger
== -1)
12072 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12076 /* Nothing to reorder because no older-only insn in the ready list. */
12077 if (first_older_only
== -1)
12080 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12084 /* Move first_older_only insn before first_younger. */
12086 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12087 INSN_UID(ready
[first_older_only
]),
12088 INSN_UID(ready
[first_younger
]));
12089 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12090 for (i
= first_older_only
; i
< first_younger
; i
++)
12092 ready
[i
] = ready
[i
+1];
12095 ready
[i
] = first_older_only_insn
;
12099 /* Implement TARGET_SCHED_REORDER. */
12101 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12107 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12110 /* Do nothing for other cores. */
12114 return arm_issue_rate ();
12117 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12118 It corrects the value of COST based on the relationship between
12119 INSN and DEP through the dependence LINK. It returns the new
12120 value. There is a per-core adjust_cost hook to adjust scheduler costs
12121 and the per-core hook can choose to completely override the generic
12122 adjust_cost function. Only put bits of code into arm_adjust_cost that
12123 are common across all cores. */
12125 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
12129 /* When generating Thumb-1 code, we want to place flag-setting operations
12130 close to a conditional branch which depends on them, so that we can
12131 omit the comparison. */
12133 && REG_NOTE_KIND (link
) == 0
12134 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12135 && recog_memoized (dep
) >= 0
12136 && get_attr_conds (dep
) == CONDS_SET
)
12139 if (current_tune
->sched_adjust_cost
!= NULL
)
12141 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
12145 /* XXX Is this strictly true? */
12146 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
12147 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
12150 /* Call insns don't incur a stall, even if they follow a load. */
12151 if (REG_NOTE_KIND (link
) == 0
12155 if ((i_pat
= single_set (insn
)) != NULL
12156 && MEM_P (SET_SRC (i_pat
))
12157 && (d_pat
= single_set (dep
)) != NULL
12158 && MEM_P (SET_DEST (d_pat
)))
12160 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12161 /* This is a load after a store, there is no conflict if the load reads
12162 from a cached area. Assume that loads from the stack, and from the
12163 constant pool are cached, and that others will miss. This is a
12166 if ((GET_CODE (src_mem
) == SYMBOL_REF
12167 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12168 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12169 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12170 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12178 arm_max_conditional_execute (void)
12180 return max_insns_skipped
;
12184 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12187 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12189 return (optimize
> 0) ? 2 : 0;
12193 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12195 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12198 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12199 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12200 sequences of non-executed instructions in IT blocks probably take the same
12201 amount of time as executed instructions (and the IT instruction itself takes
12202 space in icache). This function was experimentally determined to give good
12203 results on a popular embedded benchmark. */
12206 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12208 return (TARGET_32BIT
&& speed_p
) ? 1
12209 : arm_default_branch_cost (speed_p
, predictable_p
);
12213 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12215 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12218 static bool fp_consts_inited
= false;
12220 static REAL_VALUE_TYPE value_fp0
;
12223 init_fp_table (void)
12227 r
= REAL_VALUE_ATOF ("0", DFmode
);
12229 fp_consts_inited
= true;
12232 /* Return TRUE if rtx X is a valid immediate FP constant. */
12234 arm_const_double_rtx (rtx x
)
12236 const REAL_VALUE_TYPE
*r
;
12238 if (!fp_consts_inited
)
12241 r
= CONST_DOUBLE_REAL_VALUE (x
);
12242 if (REAL_VALUE_MINUS_ZERO (*r
))
12245 if (real_equal (r
, &value_fp0
))
12251 /* VFPv3 has a fairly wide range of representable immediates, formed from
12252 "quarter-precision" floating-point values. These can be evaluated using this
12253 formula (with ^ for exponentiation):
12257 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12258 16 <= n <= 31 and 0 <= r <= 7.
12260 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12262 - A (most-significant) is the sign bit.
12263 - BCD are the exponent (encoded as r XOR 3).
12264 - EFGH are the mantissa (encoded as n - 16).
12267 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12268 fconst[sd] instruction, or -1 if X isn't suitable. */
12270 vfp3_const_double_index (rtx x
)
12272 REAL_VALUE_TYPE r
, m
;
12273 int sign
, exponent
;
12274 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12275 unsigned HOST_WIDE_INT mask
;
12276 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12279 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12282 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12284 /* We can't represent these things, so detect them first. */
12285 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12288 /* Extract sign, exponent and mantissa. */
12289 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12290 r
= real_value_abs (&r
);
12291 exponent
= REAL_EXP (&r
);
12292 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12293 highest (sign) bit, with a fixed binary point at bit point_pos.
12294 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12295 bits for the mantissa, this may fail (low bits would be lost). */
12296 real_ldexp (&m
, &r
, point_pos
- exponent
);
12297 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12298 mantissa
= w
.elt (0);
12299 mant_hi
= w
.elt (1);
12301 /* If there are bits set in the low part of the mantissa, we can't
12302 represent this value. */
12306 /* Now make it so that mantissa contains the most-significant bits, and move
12307 the point_pos to indicate that the least-significant bits have been
12309 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12310 mantissa
= mant_hi
;
12312 /* We can permit four significant bits of mantissa only, plus a high bit
12313 which is always 1. */
12314 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12315 if ((mantissa
& mask
) != 0)
12318 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12319 mantissa
>>= point_pos
- 5;
12321 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12322 floating-point immediate zero with Neon using an integer-zero load, but
12323 that case is handled elsewhere.) */
12327 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12329 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12330 normalized significands are in the range [1, 2). (Our mantissa is shifted
12331 left 4 places at this point relative to normalized IEEE754 values). GCC
12332 internally uses [0.5, 1) (see real.c), so the exponent returned from
12333 REAL_EXP must be altered. */
12334 exponent
= 5 - exponent
;
12336 if (exponent
< 0 || exponent
> 7)
12339 /* Sign, mantissa and exponent are now in the correct form to plug into the
12340 formula described in the comment above. */
12341 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12344 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12346 vfp3_const_double_rtx (rtx x
)
12351 return vfp3_const_double_index (x
) != -1;
12354 /* Recognize immediates which can be used in various Neon instructions. Legal
12355 immediates are described by the following table (for VMVN variants, the
12356 bitwise inverse of the constant shown is recognized. In either case, VMOV
12357 is output and the correct instruction to use for a given constant is chosen
12358 by the assembler). The constant shown is replicated across all elements of
12359 the destination vector.
12361 insn elems variant constant (binary)
12362 ---- ----- ------- -----------------
12363 vmov i32 0 00000000 00000000 00000000 abcdefgh
12364 vmov i32 1 00000000 00000000 abcdefgh 00000000
12365 vmov i32 2 00000000 abcdefgh 00000000 00000000
12366 vmov i32 3 abcdefgh 00000000 00000000 00000000
12367 vmov i16 4 00000000 abcdefgh
12368 vmov i16 5 abcdefgh 00000000
12369 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12370 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12371 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12372 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12373 vmvn i16 10 00000000 abcdefgh
12374 vmvn i16 11 abcdefgh 00000000
12375 vmov i32 12 00000000 00000000 abcdefgh 11111111
12376 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12377 vmov i32 14 00000000 abcdefgh 11111111 11111111
12378 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12379 vmov i8 16 abcdefgh
12380 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12381 eeeeeeee ffffffff gggggggg hhhhhhhh
12382 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12383 vmov f32 19 00000000 00000000 00000000 00000000
12385 For case 18, B = !b. Representable values are exactly those accepted by
12386 vfp3_const_double_index, but are output as floating-point numbers rather
12389 For case 19, we will change it to vmov.i32 when assembling.
12391 Variants 0-5 (inclusive) may also be used as immediates for the second
12392 operand of VORR/VBIC instructions.
12394 The INVERSE argument causes the bitwise inverse of the given operand to be
12395 recognized instead (used for recognizing legal immediates for the VAND/VORN
12396 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12397 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12398 output, rather than the real insns vbic/vorr).
12400 INVERSE makes no difference to the recognition of float vectors.
12402 The return value is the variant of immediate as shown in the above table, or
12403 -1 if the given value doesn't match any of the listed patterns.
12406 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12407 rtx
*modconst
, int *elementwidth
)
12409 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12411 for (i = 0; i < idx; i += (STRIDE)) \
12416 immtype = (CLASS); \
12417 elsize = (ELSIZE); \
12421 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12422 unsigned int innersize
;
12423 unsigned char bytes
[16];
12424 int immtype
= -1, matches
;
12425 unsigned int invmask
= inverse
? 0xff : 0;
12426 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12429 n_elts
= CONST_VECTOR_NUNITS (op
);
12433 if (mode
== VOIDmode
)
12437 innersize
= GET_MODE_UNIT_SIZE (mode
);
12439 /* Vectors of float constants. */
12440 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12442 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12443 const REAL_VALUE_TYPE
*r0
;
12445 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12448 /* FP16 vectors cannot be represented. */
12449 if (GET_MODE_INNER (mode
) == HFmode
)
12452 r0
= CONST_DOUBLE_REAL_VALUE (el0
);
12454 for (i
= 1; i
< n_elts
; i
++)
12456 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12457 if (!real_equal (r0
, CONST_DOUBLE_REAL_VALUE (elt
)))
12462 *modconst
= CONST_VECTOR_ELT (op
, 0);
12467 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12473 /* Splat vector constant out into a byte vector. */
12474 for (i
= 0; i
< n_elts
; i
++)
12476 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12477 unsigned HOST_WIDE_INT elpart
;
12479 gcc_assert (CONST_INT_P (el
));
12480 elpart
= INTVAL (el
);
12482 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
12484 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12485 elpart
>>= BITS_PER_UNIT
;
12489 /* Sanity check. */
12490 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12494 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12495 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12497 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12498 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12500 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12501 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12503 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12504 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12506 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12508 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12510 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12511 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12513 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12514 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12516 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12517 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12519 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12520 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12522 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12524 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12526 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12527 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12529 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12530 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12532 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12533 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12535 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12536 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12538 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12540 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12541 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12549 *elementwidth
= elsize
;
12553 unsigned HOST_WIDE_INT imm
= 0;
12555 /* Un-invert bytes of recognized vector, if necessary. */
12557 for (i
= 0; i
< idx
; i
++)
12558 bytes
[i
] ^= invmask
;
12562 /* FIXME: Broken on 32-bit H_W_I hosts. */
12563 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12565 for (i
= 0; i
< 8; i
++)
12566 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12567 << (i
* BITS_PER_UNIT
);
12569 *modconst
= GEN_INT (imm
);
12573 unsigned HOST_WIDE_INT imm
= 0;
12575 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12576 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12578 *modconst
= GEN_INT (imm
);
12586 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12587 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12588 float elements), and a modified constant (whatever should be output for a
12589 VMOV) in *MODCONST. */
12592 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12593 rtx
*modconst
, int *elementwidth
)
12597 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12603 *modconst
= tmpconst
;
12606 *elementwidth
= tmpwidth
;
12611 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12612 the immediate is valid, write a constant suitable for using as an operand
12613 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12614 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12617 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12618 rtx
*modconst
, int *elementwidth
)
12622 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12624 if (retval
< 0 || retval
> 5)
12628 *modconst
= tmpconst
;
12631 *elementwidth
= tmpwidth
;
12636 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12637 the immediate is valid, write a constant suitable for using as an operand
12638 to VSHR/VSHL to *MODCONST and the corresponding element width to
12639 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12640 because they have different limitations. */
12643 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12644 rtx
*modconst
, int *elementwidth
,
12647 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12648 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12649 unsigned HOST_WIDE_INT last_elt
= 0;
12650 unsigned HOST_WIDE_INT maxshift
;
12652 /* Split vector constant out into a byte vector. */
12653 for (i
= 0; i
< n_elts
; i
++)
12655 rtx el
= CONST_VECTOR_ELT (op
, i
);
12656 unsigned HOST_WIDE_INT elpart
;
12658 if (CONST_INT_P (el
))
12659 elpart
= INTVAL (el
);
12660 else if (CONST_DOUBLE_P (el
))
12663 gcc_unreachable ();
12665 if (i
!= 0 && elpart
!= last_elt
)
12671 /* Shift less than element size. */
12672 maxshift
= innersize
* 8;
12676 /* Left shift immediate value can be from 0 to <size>-1. */
12677 if (last_elt
>= maxshift
)
12682 /* Right shift immediate value can be from 1 to <size>. */
12683 if (last_elt
== 0 || last_elt
> maxshift
)
12688 *elementwidth
= innersize
* 8;
12691 *modconst
= CONST_VECTOR_ELT (op
, 0);
12696 /* Return a string suitable for output of Neon immediate logic operation
12700 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12701 int inverse
, int quad
)
12703 int width
, is_valid
;
12704 static char templ
[40];
12706 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12708 gcc_assert (is_valid
!= 0);
12711 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12713 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12718 /* Return a string suitable for output of Neon immediate shift operation
12719 (VSHR or VSHL) MNEM. */
12722 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12723 machine_mode mode
, int quad
,
12726 int width
, is_valid
;
12727 static char templ
[40];
12729 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12730 gcc_assert (is_valid
!= 0);
12733 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12735 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12740 /* Output a sequence of pairwise operations to implement a reduction.
12741 NOTE: We do "too much work" here, because pairwise operations work on two
12742 registers-worth of operands in one go. Unfortunately we can't exploit those
12743 extra calculations to do the full operation in fewer steps, I don't think.
12744 Although all vector elements of the result but the first are ignored, we
12745 actually calculate the same result in each of the elements. An alternative
12746 such as initially loading a vector with zero to use as each of the second
12747 operands would use up an additional register and take an extra instruction,
12748 for no particular gain. */
12751 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12752 rtx (*reduc
) (rtx
, rtx
, rtx
))
12754 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12757 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12759 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12760 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12765 /* If VALS is a vector constant that can be loaded into a register
12766 using VDUP, generate instructions to do so and return an RTX to
12767 assign to the register. Otherwise return NULL_RTX. */
12770 neon_vdup_constant (rtx vals
)
12772 machine_mode mode
= GET_MODE (vals
);
12773 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12776 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12779 if (!const_vec_duplicate_p (vals
, &x
))
12780 /* The elements are not all the same. We could handle repeating
12781 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12782 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12786 /* We can load this constant by using VDUP and a constant in a
12787 single ARM register. This will be cheaper than a vector
12790 x
= copy_to_mode_reg (inner_mode
, x
);
12791 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12794 /* Generate code to load VALS, which is a PARALLEL containing only
12795 constants (for vec_init) or CONST_VECTOR, efficiently into a
12796 register. Returns an RTX to copy into the register, or NULL_RTX
12797 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12800 neon_make_constant (rtx vals
)
12802 machine_mode mode
= GET_MODE (vals
);
12804 rtx const_vec
= NULL_RTX
;
12805 int n_elts
= GET_MODE_NUNITS (mode
);
12809 if (GET_CODE (vals
) == CONST_VECTOR
)
12811 else if (GET_CODE (vals
) == PARALLEL
)
12813 /* A CONST_VECTOR must contain only CONST_INTs and
12814 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12815 Only store valid constants in a CONST_VECTOR. */
12816 for (i
= 0; i
< n_elts
; ++i
)
12818 rtx x
= XVECEXP (vals
, 0, i
);
12819 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12822 if (n_const
== n_elts
)
12823 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12826 gcc_unreachable ();
12828 if (const_vec
!= NULL
12829 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12830 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12832 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12833 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12834 pipeline cycle; creating the constant takes one or two ARM
12835 pipeline cycles. */
12837 else if (const_vec
!= NULL_RTX
)
12838 /* Load from constant pool. On Cortex-A8 this takes two cycles
12839 (for either double or quad vectors). We can not take advantage
12840 of single-cycle VLD1 because we need a PC-relative addressing
12844 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12845 We can not construct an initializer. */
12849 /* Initialize vector TARGET to VALS. */
12852 neon_expand_vector_init (rtx target
, rtx vals
)
12854 machine_mode mode
= GET_MODE (target
);
12855 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12856 int n_elts
= GET_MODE_NUNITS (mode
);
12857 int n_var
= 0, one_var
= -1;
12858 bool all_same
= true;
12862 for (i
= 0; i
< n_elts
; ++i
)
12864 x
= XVECEXP (vals
, 0, i
);
12865 if (!CONSTANT_P (x
))
12866 ++n_var
, one_var
= i
;
12868 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12874 rtx constant
= neon_make_constant (vals
);
12875 if (constant
!= NULL_RTX
)
12877 emit_move_insn (target
, constant
);
12882 /* Splat a single non-constant element if we can. */
12883 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12885 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12886 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12890 /* One field is non-constant. Load constant then overwrite varying
12891 field. This is more efficient than using the stack. */
12894 rtx copy
= copy_rtx (vals
);
12895 rtx index
= GEN_INT (one_var
);
12897 /* Load constant part of vector, substitute neighboring value for
12898 varying element. */
12899 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12900 neon_expand_vector_init (target
, copy
);
12902 /* Insert variable. */
12903 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12907 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12910 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12913 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12916 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12919 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12922 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12925 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12928 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12931 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12934 gcc_unreachable ();
12939 /* Construct the vector in memory one field at a time
12940 and load the whole vector. */
12941 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12942 for (i
= 0; i
< n_elts
; i
++)
12943 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12944 i
* GET_MODE_SIZE (inner_mode
)),
12945 XVECEXP (vals
, 0, i
));
12946 emit_move_insn (target
, mem
);
12949 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12950 ERR if it doesn't. EXP indicates the source location, which includes the
12951 inlining history for intrinsics. */
12954 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12955 const_tree exp
, const char *desc
)
12957 HOST_WIDE_INT lane
;
12959 gcc_assert (CONST_INT_P (operand
));
12961 lane
= INTVAL (operand
);
12963 if (lane
< low
|| lane
>= high
)
12966 error ("%K%s %wd out of range %wd - %wd",
12967 exp
, desc
, lane
, low
, high
- 1);
12969 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12973 /* Bounds-check lanes. */
12976 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12979 bounds_check (operand
, low
, high
, exp
, "lane");
12982 /* Bounds-check constants. */
12985 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12987 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12991 neon_element_bits (machine_mode mode
)
12993 return GET_MODE_UNIT_BITSIZE (mode
);
12997 /* Predicates for `match_operand' and `match_operator'. */
12999 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13000 WB is true if full writeback address modes are allowed and is false
13001 if limited writeback address modes (POST_INC and PRE_DEC) are
13005 arm_coproc_mem_operand (rtx op
, bool wb
)
13009 /* Reject eliminable registers. */
13010 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
13011 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13012 || reg_mentioned_p (arg_pointer_rtx
, op
)
13013 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13014 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13015 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13016 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13019 /* Constants are converted into offsets from labels. */
13023 ind
= XEXP (op
, 0);
13025 if (reload_completed
13026 && (GET_CODE (ind
) == LABEL_REF
13027 || (GET_CODE (ind
) == CONST
13028 && GET_CODE (XEXP (ind
, 0)) == PLUS
13029 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13030 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13033 /* Match: (mem (reg)). */
13035 return arm_address_register_rtx_p (ind
, 0);
13037 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13038 acceptable in any case (subject to verification by
13039 arm_address_register_rtx_p). We need WB to be true to accept
13040 PRE_INC and POST_DEC. */
13041 if (GET_CODE (ind
) == POST_INC
13042 || GET_CODE (ind
) == PRE_DEC
13044 && (GET_CODE (ind
) == PRE_INC
13045 || GET_CODE (ind
) == POST_DEC
)))
13046 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13049 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
13050 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
13051 && GET_CODE (XEXP (ind
, 1)) == PLUS
13052 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13053 ind
= XEXP (ind
, 1);
13058 if (GET_CODE (ind
) == PLUS
13059 && REG_P (XEXP (ind
, 0))
13060 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13061 && CONST_INT_P (XEXP (ind
, 1))
13062 && INTVAL (XEXP (ind
, 1)) > -1024
13063 && INTVAL (XEXP (ind
, 1)) < 1024
13064 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13070 /* Return TRUE if OP is a memory operand which we can load or store a vector
13071 to/from. TYPE is one of the following values:
13072 0 - Vector load/stor (vldr)
13073 1 - Core registers (ldm)
13074 2 - Element/structure loads (vld1)
13077 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13081 /* Reject eliminable registers. */
13082 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13083 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13084 || reg_mentioned_p (arg_pointer_rtx
, op
)
13085 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13086 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13087 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13088 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13091 /* Constants are converted into offsets from labels. */
13095 ind
= XEXP (op
, 0);
13097 if (reload_completed
13098 && (GET_CODE (ind
) == LABEL_REF
13099 || (GET_CODE (ind
) == CONST
13100 && GET_CODE (XEXP (ind
, 0)) == PLUS
13101 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13102 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13105 /* Match: (mem (reg)). */
13107 return arm_address_register_rtx_p (ind
, 0);
13109 /* Allow post-increment with Neon registers. */
13110 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13111 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13112 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13114 /* Allow post-increment by register for VLDn */
13115 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13116 && GET_CODE (XEXP (ind
, 1)) == PLUS
13117 && REG_P (XEXP (XEXP (ind
, 1), 1)))
13124 && GET_CODE (ind
) == PLUS
13125 && REG_P (XEXP (ind
, 0))
13126 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13127 && CONST_INT_P (XEXP (ind
, 1))
13128 && INTVAL (XEXP (ind
, 1)) > -1024
13129 /* For quad modes, we restrict the constant offset to be slightly less
13130 than what the instruction format permits. We have no such constraint
13131 on double mode offsets. (This must match arm_legitimate_index_p.) */
13132 && (INTVAL (XEXP (ind
, 1))
13133 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13134 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13140 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13143 neon_struct_mem_operand (rtx op
)
13147 /* Reject eliminable registers. */
13148 if (! (reload_in_progress
|| reload_completed
)
13149 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13150 || reg_mentioned_p (arg_pointer_rtx
, op
)
13151 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13152 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13153 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13154 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13157 /* Constants are converted into offsets from labels. */
13161 ind
= XEXP (op
, 0);
13163 if (reload_completed
13164 && (GET_CODE (ind
) == LABEL_REF
13165 || (GET_CODE (ind
) == CONST
13166 && GET_CODE (XEXP (ind
, 0)) == PLUS
13167 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13168 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13171 /* Match: (mem (reg)). */
13173 return arm_address_register_rtx_p (ind
, 0);
13175 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13176 if (GET_CODE (ind
) == POST_INC
13177 || GET_CODE (ind
) == PRE_DEC
)
13178 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13183 /* Return true if X is a register that will be eliminated later on. */
13185 arm_eliminable_register (rtx x
)
13187 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13188 || REGNO (x
) == ARG_POINTER_REGNUM
13189 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13190 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13193 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13194 coprocessor registers. Otherwise return NO_REGS. */
13197 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13199 if (mode
== HFmode
)
13201 if (!TARGET_NEON_FP16
)
13202 return GENERAL_REGS
;
13203 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13205 return GENERAL_REGS
;
13208 /* The neon move patterns handle all legitimate vector and struct
13211 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13212 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13213 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13214 || VALID_NEON_STRUCT_MODE (mode
)))
13217 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13220 return GENERAL_REGS
;
13223 /* Values which must be returned in the most-significant end of the return
13227 arm_return_in_msb (const_tree valtype
)
13229 return (TARGET_AAPCS_BASED
13230 && BYTES_BIG_ENDIAN
13231 && (AGGREGATE_TYPE_P (valtype
)
13232 || TREE_CODE (valtype
) == COMPLEX_TYPE
13233 || FIXED_POINT_TYPE_P (valtype
)));
13236 /* Return TRUE if X references a SYMBOL_REF. */
13238 symbol_mentioned_p (rtx x
)
13243 if (GET_CODE (x
) == SYMBOL_REF
)
13246 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13247 are constant offsets, not symbols. */
13248 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13251 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13253 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13259 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13260 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13263 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13270 /* Return TRUE if X references a LABEL_REF. */
13272 label_mentioned_p (rtx x
)
13277 if (GET_CODE (x
) == LABEL_REF
)
13280 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13281 instruction, but they are constant offsets, not symbols. */
13282 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13285 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13286 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13292 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13293 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13296 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13304 tls_mentioned_p (rtx x
)
13306 switch (GET_CODE (x
))
13309 return tls_mentioned_p (XEXP (x
, 0));
13312 if (XINT (x
, 1) == UNSPEC_TLS
)
13320 /* Must not copy any rtx that uses a pc-relative address.
13321 Also, disallow copying of load-exclusive instructions that
13322 may appear after splitting of compare-and-swap-style operations
13323 so as to prevent those loops from being transformed away from their
13324 canonical forms (see PR 69904). */
13327 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13329 /* The tls call insn cannot be copied, as it is paired with a data
13331 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13334 subrtx_iterator::array_type array
;
13335 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13337 const_rtx x
= *iter
;
13338 if (GET_CODE (x
) == UNSPEC
13339 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13340 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13344 rtx set
= single_set (insn
);
13347 rtx src
= SET_SRC (set
);
13348 if (GET_CODE (src
) == ZERO_EXTEND
)
13349 src
= XEXP (src
, 0);
13351 /* Catch the load-exclusive and load-acquire operations. */
13352 if (GET_CODE (src
) == UNSPEC_VOLATILE
13353 && (XINT (src
, 1) == VUNSPEC_LL
13354 || XINT (src
, 1) == VUNSPEC_LAX
))
13361 minmax_code (rtx x
)
13363 enum rtx_code code
= GET_CODE (x
);
13376 gcc_unreachable ();
13380 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13383 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13384 int *mask
, bool *signed_sat
)
13386 /* The high bound must be a power of two minus one. */
13387 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13391 /* The low bound is either zero (for usat) or one less than the
13392 negation of the high bound (for ssat). */
13393 if (INTVAL (lo_bound
) == 0)
13398 *signed_sat
= false;
13403 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13408 *signed_sat
= true;
13416 /* Return 1 if memory locations are adjacent. */
13418 adjacent_mem_locations (rtx a
, rtx b
)
13420 /* We don't guarantee to preserve the order of these memory refs. */
13421 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13424 if ((REG_P (XEXP (a
, 0))
13425 || (GET_CODE (XEXP (a
, 0)) == PLUS
13426 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13427 && (REG_P (XEXP (b
, 0))
13428 || (GET_CODE (XEXP (b
, 0)) == PLUS
13429 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13431 HOST_WIDE_INT val0
= 0, val1
= 0;
13435 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13437 reg0
= XEXP (XEXP (a
, 0), 0);
13438 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13441 reg0
= XEXP (a
, 0);
13443 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13445 reg1
= XEXP (XEXP (b
, 0), 0);
13446 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13449 reg1
= XEXP (b
, 0);
13451 /* Don't accept any offset that will require multiple
13452 instructions to handle, since this would cause the
13453 arith_adjacentmem pattern to output an overlong sequence. */
13454 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13457 /* Don't allow an eliminable register: register elimination can make
13458 the offset too large. */
13459 if (arm_eliminable_register (reg0
))
13462 val_diff
= val1
- val0
;
13466 /* If the target has load delay slots, then there's no benefit
13467 to using an ldm instruction unless the offset is zero and
13468 we are optimizing for size. */
13469 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13470 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13471 && (val_diff
== 4 || val_diff
== -4));
13474 return ((REGNO (reg0
) == REGNO (reg1
))
13475 && (val_diff
== 4 || val_diff
== -4));
13481 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13482 for load operations, false for store operations. CONSECUTIVE is true
13483 if the register numbers in the operation must be consecutive in the register
13484 bank. RETURN_PC is true if value is to be loaded in PC.
13485 The pattern we are trying to match for load is:
13486 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13487 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13490 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13493 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13494 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13495 3. If consecutive is TRUE, then for kth register being loaded,
13496 REGNO (R_dk) = REGNO (R_d0) + k.
13497 The pattern for store is similar. */
13499 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13500 bool consecutive
, bool return_pc
)
13502 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13503 rtx reg
, mem
, addr
;
13505 unsigned first_regno
;
13506 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13508 bool addr_reg_in_reglist
= false;
13509 bool update
= false;
13514 /* If not in SImode, then registers must be consecutive
13515 (e.g., VLDM instructions for DFmode). */
13516 gcc_assert ((mode
== SImode
) || consecutive
);
13517 /* Setting return_pc for stores is illegal. */
13518 gcc_assert (!return_pc
|| load
);
13520 /* Set up the increments and the regs per val based on the mode. */
13521 reg_increment
= GET_MODE_SIZE (mode
);
13522 regs_per_val
= reg_increment
/ 4;
13523 offset_adj
= return_pc
? 1 : 0;
13526 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13527 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13530 /* Check if this is a write-back. */
13531 elt
= XVECEXP (op
, 0, offset_adj
);
13532 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13538 /* The offset adjustment must be the number of registers being
13539 popped times the size of a single register. */
13540 if (!REG_P (SET_DEST (elt
))
13541 || !REG_P (XEXP (SET_SRC (elt
), 0))
13542 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13543 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13544 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13545 ((count
- 1 - offset_adj
) * reg_increment
))
13549 i
= i
+ offset_adj
;
13550 base
= base
+ offset_adj
;
13551 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13552 success depends on the type: VLDM can do just one reg,
13553 LDM must do at least two. */
13554 if ((count
<= i
) && (mode
== SImode
))
13557 elt
= XVECEXP (op
, 0, i
- 1);
13558 if (GET_CODE (elt
) != SET
)
13563 reg
= SET_DEST (elt
);
13564 mem
= SET_SRC (elt
);
13568 reg
= SET_SRC (elt
);
13569 mem
= SET_DEST (elt
);
13572 if (!REG_P (reg
) || !MEM_P (mem
))
13575 regno
= REGNO (reg
);
13576 first_regno
= regno
;
13577 addr
= XEXP (mem
, 0);
13578 if (GET_CODE (addr
) == PLUS
)
13580 if (!CONST_INT_P (XEXP (addr
, 1)))
13583 offset
= INTVAL (XEXP (addr
, 1));
13584 addr
= XEXP (addr
, 0);
13590 /* Don't allow SP to be loaded unless it is also the base register. It
13591 guarantees that SP is reset correctly when an LDM instruction
13592 is interrupted. Otherwise, we might end up with a corrupt stack. */
13593 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13596 for (; i
< count
; i
++)
13598 elt
= XVECEXP (op
, 0, i
);
13599 if (GET_CODE (elt
) != SET
)
13604 reg
= SET_DEST (elt
);
13605 mem
= SET_SRC (elt
);
13609 reg
= SET_SRC (elt
);
13610 mem
= SET_DEST (elt
);
13614 || GET_MODE (reg
) != mode
13615 || REGNO (reg
) <= regno
13618 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13619 /* Don't allow SP to be loaded unless it is also the base register. It
13620 guarantees that SP is reset correctly when an LDM instruction
13621 is interrupted. Otherwise, we might end up with a corrupt stack. */
13622 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13624 || GET_MODE (mem
) != mode
13625 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13626 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13627 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13628 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13629 offset
+ (i
- base
) * reg_increment
))
13630 && (!REG_P (XEXP (mem
, 0))
13631 || offset
+ (i
- base
) * reg_increment
!= 0)))
13634 regno
= REGNO (reg
);
13635 if (regno
== REGNO (addr
))
13636 addr_reg_in_reglist
= true;
13641 if (update
&& addr_reg_in_reglist
)
13644 /* For Thumb-1, address register is always modified - either by write-back
13645 or by explicit load. If the pattern does not describe an update,
13646 then the address register must be in the list of loaded registers. */
13648 return update
|| addr_reg_in_reglist
;
13654 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13655 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13656 instruction. ADD_OFFSET is nonzero if the base address register needs
13657 to be modified with an add instruction before we can use it. */
13660 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13661 int nops
, HOST_WIDE_INT add_offset
)
13663 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13664 if the offset isn't small enough. The reason 2 ldrs are faster
13665 is because these ARMs are able to do more than one cache access
13666 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13667 whilst the ARM8 has a double bandwidth cache. This means that
13668 these cores can do both an instruction fetch and a data fetch in
13669 a single cycle, so the trick of calculating the address into a
13670 scratch register (one of the result regs) and then doing a load
13671 multiple actually becomes slower (and no smaller in code size).
13672 That is the transformation
13674 ldr rd1, [rbase + offset]
13675 ldr rd2, [rbase + offset + 4]
13679 add rd1, rbase, offset
13680 ldmia rd1, {rd1, rd2}
13682 produces worse code -- '3 cycles + any stalls on rd2' instead of
13683 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13684 access per cycle, the first sequence could never complete in less
13685 than 6 cycles, whereas the ldm sequence would only take 5 and
13686 would make better use of sequential accesses if not hitting the
13689 We cheat here and test 'arm_ld_sched' which we currently know to
13690 only be true for the ARM8, ARM9 and StrongARM. If this ever
13691 changes, then the test below needs to be reworked. */
13692 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13695 /* XScale has load-store double instructions, but they have stricter
13696 alignment requirements than load-store multiple, so we cannot
13699 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13700 the pipeline until completion.
13708 An ldr instruction takes 1-3 cycles, but does not block the
13717 Best case ldr will always win. However, the more ldr instructions
13718 we issue, the less likely we are to be able to schedule them well.
13719 Using ldr instructions also increases code size.
13721 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13722 for counts of 3 or 4 regs. */
13723 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13728 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13729 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13730 an array ORDER which describes the sequence to use when accessing the
13731 offsets that produces an ascending order. In this sequence, each
13732 offset must be larger by exactly 4 than the previous one. ORDER[0]
13733 must have been filled in with the lowest offset by the caller.
13734 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13735 we use to verify that ORDER produces an ascending order of registers.
13736 Return true if it was possible to construct such an order, false if
13740 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13741 int *unsorted_regs
)
13744 for (i
= 1; i
< nops
; i
++)
13748 order
[i
] = order
[i
- 1];
13749 for (j
= 0; j
< nops
; j
++)
13750 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13752 /* We must find exactly one offset that is higher than the
13753 previous one by 4. */
13754 if (order
[i
] != order
[i
- 1])
13758 if (order
[i
] == order
[i
- 1])
13760 /* The register numbers must be ascending. */
13761 if (unsorted_regs
!= NULL
13762 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13768 /* Used to determine in a peephole whether a sequence of load
13769 instructions can be changed into a load-multiple instruction.
13770 NOPS is the number of separate load instructions we are examining. The
13771 first NOPS entries in OPERANDS are the destination registers, the
13772 next NOPS entries are memory operands. If this function is
13773 successful, *BASE is set to the common base register of the memory
13774 accesses; *LOAD_OFFSET is set to the first memory location's offset
13775 from that base register.
13776 REGS is an array filled in with the destination register numbers.
13777 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13778 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13779 the sequence of registers in REGS matches the loads from ascending memory
13780 locations, and the function verifies that the register numbers are
13781 themselves ascending. If CHECK_REGS is false, the register numbers
13782 are stored in the order they are found in the operands. */
13784 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13785 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13787 int unsorted_regs
[MAX_LDM_STM_OPS
];
13788 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13789 int order
[MAX_LDM_STM_OPS
];
13790 rtx base_reg_rtx
= NULL
;
13794 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13795 easily extended if required. */
13796 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13798 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13800 /* Loop over the operands and check that the memory references are
13801 suitable (i.e. immediate offsets from the same base register). At
13802 the same time, extract the target register, and the memory
13804 for (i
= 0; i
< nops
; i
++)
13809 /* Convert a subreg of a mem into the mem itself. */
13810 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13811 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13813 gcc_assert (MEM_P (operands
[nops
+ i
]));
13815 /* Don't reorder volatile memory references; it doesn't seem worth
13816 looking for the case where the order is ok anyway. */
13817 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13820 offset
= const0_rtx
;
13822 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13823 || (GET_CODE (reg
) == SUBREG
13824 && REG_P (reg
= SUBREG_REG (reg
))))
13825 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13826 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13827 || (GET_CODE (reg
) == SUBREG
13828 && REG_P (reg
= SUBREG_REG (reg
))))
13829 && (CONST_INT_P (offset
13830 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13834 base_reg
= REGNO (reg
);
13835 base_reg_rtx
= reg
;
13836 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13839 else if (base_reg
!= (int) REGNO (reg
))
13840 /* Not addressed from the same base register. */
13843 unsorted_regs
[i
] = (REG_P (operands
[i
])
13844 ? REGNO (operands
[i
])
13845 : REGNO (SUBREG_REG (operands
[i
])));
13847 /* If it isn't an integer register, or if it overwrites the
13848 base register but isn't the last insn in the list, then
13849 we can't do this. */
13850 if (unsorted_regs
[i
] < 0
13851 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13852 || unsorted_regs
[i
] > 14
13853 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13856 /* Don't allow SP to be loaded unless it is also the base
13857 register. It guarantees that SP is reset correctly when
13858 an LDM instruction is interrupted. Otherwise, we might
13859 end up with a corrupt stack. */
13860 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13863 unsorted_offsets
[i
] = INTVAL (offset
);
13864 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13868 /* Not a suitable memory address. */
13872 /* All the useful information has now been extracted from the
13873 operands into unsorted_regs and unsorted_offsets; additionally,
13874 order[0] has been set to the lowest offset in the list. Sort
13875 the offsets into order, verifying that they are adjacent, and
13876 check that the register numbers are ascending. */
13877 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13878 check_regs
? unsorted_regs
: NULL
))
13882 memcpy (saved_order
, order
, sizeof order
);
13888 for (i
= 0; i
< nops
; i
++)
13889 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13891 *load_offset
= unsorted_offsets
[order
[0]];
13895 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13898 if (unsorted_offsets
[order
[0]] == 0)
13899 ldm_case
= 1; /* ldmia */
13900 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13901 ldm_case
= 2; /* ldmib */
13902 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13903 ldm_case
= 3; /* ldmda */
13904 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13905 ldm_case
= 4; /* ldmdb */
13906 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13907 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13912 if (!multiple_operation_profitable_p (false, nops
,
13914 ? unsorted_offsets
[order
[0]] : 0))
13920 /* Used to determine in a peephole whether a sequence of store instructions can
13921 be changed into a store-multiple instruction.
13922 NOPS is the number of separate store instructions we are examining.
13923 NOPS_TOTAL is the total number of instructions recognized by the peephole
13925 The first NOPS entries in OPERANDS are the source registers, the next
13926 NOPS entries are memory operands. If this function is successful, *BASE is
13927 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13928 to the first memory location's offset from that base register. REGS is an
13929 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13930 likewise filled with the corresponding rtx's.
13931 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13932 numbers to an ascending order of stores.
13933 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13934 from ascending memory locations, and the function verifies that the register
13935 numbers are themselves ascending. If CHECK_REGS is false, the register
13936 numbers are stored in the order they are found in the operands. */
13938 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13939 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13940 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13942 int unsorted_regs
[MAX_LDM_STM_OPS
];
13943 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13944 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13945 int order
[MAX_LDM_STM_OPS
];
13947 rtx base_reg_rtx
= NULL
;
13950 /* Write back of base register is currently only supported for Thumb 1. */
13951 int base_writeback
= TARGET_THUMB1
;
13953 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13954 easily extended if required. */
13955 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13957 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13959 /* Loop over the operands and check that the memory references are
13960 suitable (i.e. immediate offsets from the same base register). At
13961 the same time, extract the target register, and the memory
13963 for (i
= 0; i
< nops
; i
++)
13968 /* Convert a subreg of a mem into the mem itself. */
13969 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13970 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13972 gcc_assert (MEM_P (operands
[nops
+ i
]));
13974 /* Don't reorder volatile memory references; it doesn't seem worth
13975 looking for the case where the order is ok anyway. */
13976 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13979 offset
= const0_rtx
;
13981 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13982 || (GET_CODE (reg
) == SUBREG
13983 && REG_P (reg
= SUBREG_REG (reg
))))
13984 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13985 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13986 || (GET_CODE (reg
) == SUBREG
13987 && REG_P (reg
= SUBREG_REG (reg
))))
13988 && (CONST_INT_P (offset
13989 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13991 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13992 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13993 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13997 base_reg
= REGNO (reg
);
13998 base_reg_rtx
= reg
;
13999 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14002 else if (base_reg
!= (int) REGNO (reg
))
14003 /* Not addressed from the same base register. */
14006 /* If it isn't an integer register, then we can't do this. */
14007 if (unsorted_regs
[i
] < 0
14008 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14009 /* The effects are unpredictable if the base register is
14010 both updated and stored. */
14011 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
14012 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
14013 || unsorted_regs
[i
] > 14)
14016 unsorted_offsets
[i
] = INTVAL (offset
);
14017 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14021 /* Not a suitable memory address. */
14025 /* All the useful information has now been extracted from the
14026 operands into unsorted_regs and unsorted_offsets; additionally,
14027 order[0] has been set to the lowest offset in the list. Sort
14028 the offsets into order, verifying that they are adjacent, and
14029 check that the register numbers are ascending. */
14030 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14031 check_regs
? unsorted_regs
: NULL
))
14035 memcpy (saved_order
, order
, sizeof order
);
14041 for (i
= 0; i
< nops
; i
++)
14043 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14045 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
14048 *load_offset
= unsorted_offsets
[order
[0]];
14052 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14055 if (unsorted_offsets
[order
[0]] == 0)
14056 stm_case
= 1; /* stmia */
14057 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14058 stm_case
= 2; /* stmib */
14059 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14060 stm_case
= 3; /* stmda */
14061 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14062 stm_case
= 4; /* stmdb */
14066 if (!multiple_operation_profitable_p (false, nops
, 0))
14072 /* Routines for use in generating RTL. */
14074 /* Generate a load-multiple instruction. COUNT is the number of loads in
14075 the instruction; REGS and MEMS are arrays containing the operands.
14076 BASEREG is the base register to be used in addressing the memory operands.
14077 WBACK_OFFSET is nonzero if the instruction should update the base
14081 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14082 HOST_WIDE_INT wback_offset
)
14087 if (!multiple_operation_profitable_p (false, count
, 0))
14093 for (i
= 0; i
< count
; i
++)
14094 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14096 if (wback_offset
!= 0)
14097 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14099 seq
= get_insns ();
14105 result
= gen_rtx_PARALLEL (VOIDmode
,
14106 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14107 if (wback_offset
!= 0)
14109 XVECEXP (result
, 0, 0)
14110 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14115 for (j
= 0; i
< count
; i
++, j
++)
14116 XVECEXP (result
, 0, i
)
14117 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14122 /* Generate a store-multiple instruction. COUNT is the number of stores in
14123 the instruction; REGS and MEMS are arrays containing the operands.
14124 BASEREG is the base register to be used in addressing the memory operands.
14125 WBACK_OFFSET is nonzero if the instruction should update the base
14129 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14130 HOST_WIDE_INT wback_offset
)
14135 if (GET_CODE (basereg
) == PLUS
)
14136 basereg
= XEXP (basereg
, 0);
14138 if (!multiple_operation_profitable_p (false, count
, 0))
14144 for (i
= 0; i
< count
; i
++)
14145 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14147 if (wback_offset
!= 0)
14148 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14150 seq
= get_insns ();
14156 result
= gen_rtx_PARALLEL (VOIDmode
,
14157 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14158 if (wback_offset
!= 0)
14160 XVECEXP (result
, 0, 0)
14161 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14166 for (j
= 0; i
< count
; i
++, j
++)
14167 XVECEXP (result
, 0, i
)
14168 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14173 /* Generate either a load-multiple or a store-multiple instruction. This
14174 function can be used in situations where we can start with a single MEM
14175 rtx and adjust its address upwards.
14176 COUNT is the number of operations in the instruction, not counting a
14177 possible update of the base register. REGS is an array containing the
14179 BASEREG is the base register to be used in addressing the memory operands,
14180 which are constructed from BASEMEM.
14181 WRITE_BACK specifies whether the generated instruction should include an
14182 update of the base register.
14183 OFFSETP is used to pass an offset to and from this function; this offset
14184 is not used when constructing the address (instead BASEMEM should have an
14185 appropriate offset in its address), it is used only for setting
14186 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14189 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14190 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14192 rtx mems
[MAX_LDM_STM_OPS
];
14193 HOST_WIDE_INT offset
= *offsetp
;
14196 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14198 if (GET_CODE (basereg
) == PLUS
)
14199 basereg
= XEXP (basereg
, 0);
14201 for (i
= 0; i
< count
; i
++)
14203 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14204 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14212 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14213 write_back
? 4 * count
: 0);
14215 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14216 write_back
? 4 * count
: 0);
14220 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14221 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14223 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14228 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14229 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14231 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14235 /* Called from a peephole2 expander to turn a sequence of loads into an
14236 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14237 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14238 is true if we can reorder the registers because they are used commutatively
14240 Returns true iff we could generate a new instruction. */
14243 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14245 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14246 rtx mems
[MAX_LDM_STM_OPS
];
14247 int i
, j
, base_reg
;
14249 HOST_WIDE_INT offset
;
14250 int write_back
= FALSE
;
14254 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14255 &base_reg
, &offset
, !sort_regs
);
14261 for (i
= 0; i
< nops
- 1; i
++)
14262 for (j
= i
+ 1; j
< nops
; j
++)
14263 if (regs
[i
] > regs
[j
])
14269 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14273 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14274 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14280 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14281 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14283 if (!TARGET_THUMB1
)
14285 base_reg
= regs
[0];
14286 base_reg_rtx
= newbase
;
14290 for (i
= 0; i
< nops
; i
++)
14292 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14293 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14296 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14297 write_back
? offset
+ i
* 4 : 0));
14301 /* Called from a peephole2 expander to turn a sequence of stores into an
14302 STM instruction. OPERANDS are the operands found by the peephole matcher;
14303 NOPS indicates how many separate stores we are trying to combine.
14304 Returns true iff we could generate a new instruction. */
14307 gen_stm_seq (rtx
*operands
, int nops
)
14310 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14311 rtx mems
[MAX_LDM_STM_OPS
];
14314 HOST_WIDE_INT offset
;
14315 int write_back
= FALSE
;
14318 bool base_reg_dies
;
14320 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14321 mem_order
, &base_reg
, &offset
, true);
14326 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14328 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14331 gcc_assert (base_reg_dies
);
14337 gcc_assert (base_reg_dies
);
14338 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14342 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14344 for (i
= 0; i
< nops
; i
++)
14346 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14347 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14350 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14351 write_back
? offset
+ i
* 4 : 0));
14355 /* Called from a peephole2 expander to turn a sequence of stores that are
14356 preceded by constant loads into an STM instruction. OPERANDS are the
14357 operands found by the peephole matcher; NOPS indicates how many
14358 separate stores we are trying to combine; there are 2 * NOPS
14359 instructions in the peephole.
14360 Returns true iff we could generate a new instruction. */
14363 gen_const_stm_seq (rtx
*operands
, int nops
)
14365 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14366 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14367 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14368 rtx mems
[MAX_LDM_STM_OPS
];
14371 HOST_WIDE_INT offset
;
14372 int write_back
= FALSE
;
14375 bool base_reg_dies
;
14377 HARD_REG_SET allocated
;
14379 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14380 mem_order
, &base_reg
, &offset
, false);
14385 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14387 /* If the same register is used more than once, try to find a free
14389 CLEAR_HARD_REG_SET (allocated
);
14390 for (i
= 0; i
< nops
; i
++)
14392 for (j
= i
+ 1; j
< nops
; j
++)
14393 if (regs
[i
] == regs
[j
])
14395 rtx t
= peep2_find_free_register (0, nops
* 2,
14396 TARGET_THUMB1
? "l" : "r",
14397 SImode
, &allocated
);
14401 regs
[i
] = REGNO (t
);
14405 /* Compute an ordering that maps the register numbers to an ascending
14408 for (i
= 0; i
< nops
; i
++)
14409 if (regs
[i
] < regs
[reg_order
[0]])
14412 for (i
= 1; i
< nops
; i
++)
14414 int this_order
= reg_order
[i
- 1];
14415 for (j
= 0; j
< nops
; j
++)
14416 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14417 && (this_order
== reg_order
[i
- 1]
14418 || regs
[j
] < regs
[this_order
]))
14420 reg_order
[i
] = this_order
;
14423 /* Ensure that registers that must be live after the instruction end
14424 up with the correct value. */
14425 for (i
= 0; i
< nops
; i
++)
14427 int this_order
= reg_order
[i
];
14428 if ((this_order
!= mem_order
[i
]
14429 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14430 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14434 /* Load the constants. */
14435 for (i
= 0; i
< nops
; i
++)
14437 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14438 sorted_regs
[i
] = regs
[reg_order
[i
]];
14439 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14442 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14444 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14447 gcc_assert (base_reg_dies
);
14453 gcc_assert (base_reg_dies
);
14454 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14458 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14460 for (i
= 0; i
< nops
; i
++)
14462 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14463 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14466 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14467 write_back
? offset
+ i
* 4 : 0));
14471 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14472 unaligned copies on processors which support unaligned semantics for those
14473 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14474 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14475 An interleave factor of 1 (the minimum) will perform no interleaving.
14476 Load/store multiple are used for aligned addresses where possible. */
14479 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14480 HOST_WIDE_INT length
,
14481 unsigned int interleave_factor
)
14483 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14484 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14485 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14486 HOST_WIDE_INT i
, j
;
14487 HOST_WIDE_INT remaining
= length
, words
;
14488 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14490 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14491 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14492 HOST_WIDE_INT srcoffset
, dstoffset
;
14493 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14496 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14498 /* Use hard registers if we have aligned source or destination so we can use
14499 load/store multiple with contiguous registers. */
14500 if (dst_aligned
|| src_aligned
)
14501 for (i
= 0; i
< interleave_factor
; i
++)
14502 regs
[i
] = gen_rtx_REG (SImode
, i
);
14504 for (i
= 0; i
< interleave_factor
; i
++)
14505 regs
[i
] = gen_reg_rtx (SImode
);
14507 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14508 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14510 srcoffset
= dstoffset
= 0;
14512 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14513 For copying the last bytes we want to subtract this offset again. */
14514 src_autoinc
= dst_autoinc
= 0;
14516 for (i
= 0; i
< interleave_factor
; i
++)
14519 /* Copy BLOCK_SIZE_BYTES chunks. */
14521 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14524 if (src_aligned
&& interleave_factor
> 1)
14526 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14527 TRUE
, srcbase
, &srcoffset
));
14528 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14532 for (j
= 0; j
< interleave_factor
; j
++)
14534 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14536 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14537 srcoffset
+ j
* UNITS_PER_WORD
);
14538 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14540 srcoffset
+= block_size_bytes
;
14544 if (dst_aligned
&& interleave_factor
> 1)
14546 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14547 TRUE
, dstbase
, &dstoffset
));
14548 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14552 for (j
= 0; j
< interleave_factor
; j
++)
14554 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14556 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14557 dstoffset
+ j
* UNITS_PER_WORD
);
14558 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14560 dstoffset
+= block_size_bytes
;
14563 remaining
-= block_size_bytes
;
14566 /* Copy any whole words left (note these aren't interleaved with any
14567 subsequent halfword/byte load/stores in the interests of simplicity). */
14569 words
= remaining
/ UNITS_PER_WORD
;
14571 gcc_assert (words
< interleave_factor
);
14573 if (src_aligned
&& words
> 1)
14575 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14577 src_autoinc
+= UNITS_PER_WORD
* words
;
14581 for (j
= 0; j
< words
; j
++)
14583 addr
= plus_constant (Pmode
, src
,
14584 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14585 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14586 srcoffset
+ j
* UNITS_PER_WORD
);
14588 emit_move_insn (regs
[j
], mem
);
14590 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14592 srcoffset
+= words
* UNITS_PER_WORD
;
14595 if (dst_aligned
&& words
> 1)
14597 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14599 dst_autoinc
+= words
* UNITS_PER_WORD
;
14603 for (j
= 0; j
< words
; j
++)
14605 addr
= plus_constant (Pmode
, dst
,
14606 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14607 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14608 dstoffset
+ j
* UNITS_PER_WORD
);
14610 emit_move_insn (mem
, regs
[j
]);
14612 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14614 dstoffset
+= words
* UNITS_PER_WORD
;
14617 remaining
-= words
* UNITS_PER_WORD
;
14619 gcc_assert (remaining
< 4);
14621 /* Copy a halfword if necessary. */
14623 if (remaining
>= 2)
14625 halfword_tmp
= gen_reg_rtx (SImode
);
14627 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14628 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14629 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14631 /* Either write out immediately, or delay until we've loaded the last
14632 byte, depending on interleave factor. */
14633 if (interleave_factor
== 1)
14635 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14636 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14637 emit_insn (gen_unaligned_storehi (mem
,
14638 gen_lowpart (HImode
, halfword_tmp
)));
14639 halfword_tmp
= NULL
;
14647 gcc_assert (remaining
< 2);
14649 /* Copy last byte. */
14651 if ((remaining
& 1) != 0)
14653 byte_tmp
= gen_reg_rtx (SImode
);
14655 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14656 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14657 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14659 if (interleave_factor
== 1)
14661 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14662 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14663 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14672 /* Store last halfword if we haven't done so already. */
14676 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14677 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14678 emit_insn (gen_unaligned_storehi (mem
,
14679 gen_lowpart (HImode
, halfword_tmp
)));
14683 /* Likewise for last byte. */
14687 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14688 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14689 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14693 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14696 /* From mips_adjust_block_mem:
14698 Helper function for doing a loop-based block operation on memory
14699 reference MEM. Each iteration of the loop will operate on LENGTH
14702 Create a new base register for use within the loop and point it to
14703 the start of MEM. Create a new memory reference that uses this
14704 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14707 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14710 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14712 /* Although the new mem does not refer to a known location,
14713 it does keep up to LENGTH bytes of alignment. */
14714 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14715 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14718 /* From mips_block_move_loop:
14720 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14721 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14722 the memory regions do not overlap. */
14725 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14726 unsigned int interleave_factor
,
14727 HOST_WIDE_INT bytes_per_iter
)
14729 rtx src_reg
, dest_reg
, final_src
, test
;
14730 HOST_WIDE_INT leftover
;
14732 leftover
= length
% bytes_per_iter
;
14733 length
-= leftover
;
14735 /* Create registers and memory references for use within the loop. */
14736 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14737 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14739 /* Calculate the value that SRC_REG should have after the last iteration of
14741 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14742 0, 0, OPTAB_WIDEN
);
14744 /* Emit the start of the loop. */
14745 rtx_code_label
*label
= gen_label_rtx ();
14746 emit_label (label
);
14748 /* Emit the loop body. */
14749 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14750 interleave_factor
);
14752 /* Move on to the next block. */
14753 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14754 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14756 /* Emit the loop condition. */
14757 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14758 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14760 /* Mop up any left-over bytes. */
14762 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14765 /* Emit a block move when either the source or destination is unaligned (not
14766 aligned to a four-byte boundary). This may need further tuning depending on
14767 core type, optimize_size setting, etc. */
14770 arm_movmemqi_unaligned (rtx
*operands
)
14772 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14776 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14777 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14778 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14779 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14780 or dst_aligned though: allow more interleaving in those cases since the
14781 resulting code can be smaller. */
14782 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14783 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14786 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14787 interleave_factor
, bytes_per_iter
);
14789 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14790 interleave_factor
);
14794 /* Note that the loop created by arm_block_move_unaligned_loop may be
14795 subject to loop unrolling, which makes tuning this condition a little
14798 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14800 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14807 arm_gen_movmemqi (rtx
*operands
)
14809 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14810 HOST_WIDE_INT srcoffset
, dstoffset
;
14812 rtx src
, dst
, srcbase
, dstbase
;
14813 rtx part_bytes_reg
= NULL
;
14816 if (!CONST_INT_P (operands
[2])
14817 || !CONST_INT_P (operands
[3])
14818 || INTVAL (operands
[2]) > 64)
14821 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14822 return arm_movmemqi_unaligned (operands
);
14824 if (INTVAL (operands
[3]) & 3)
14827 dstbase
= operands
[0];
14828 srcbase
= operands
[1];
14830 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14831 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14833 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14834 out_words_to_go
= INTVAL (operands
[2]) / 4;
14835 last_bytes
= INTVAL (operands
[2]) & 3;
14836 dstoffset
= srcoffset
= 0;
14838 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14839 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14841 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14843 if (in_words_to_go
> 4)
14844 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14845 TRUE
, srcbase
, &srcoffset
));
14847 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14848 src
, FALSE
, srcbase
,
14851 if (out_words_to_go
)
14853 if (out_words_to_go
> 4)
14854 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14855 TRUE
, dstbase
, &dstoffset
));
14856 else if (out_words_to_go
!= 1)
14857 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14858 out_words_to_go
, dst
,
14861 dstbase
, &dstoffset
));
14864 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14865 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14866 if (last_bytes
!= 0)
14868 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14874 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14875 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14878 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14879 if (out_words_to_go
)
14883 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14884 sreg
= copy_to_reg (mem
);
14886 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14887 emit_move_insn (mem
, sreg
);
14890 gcc_assert (!in_words_to_go
); /* Sanity check */
14893 if (in_words_to_go
)
14895 gcc_assert (in_words_to_go
> 0);
14897 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14898 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14901 gcc_assert (!last_bytes
|| part_bytes_reg
);
14903 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14905 rtx tmp
= gen_reg_rtx (SImode
);
14907 /* The bytes we want are in the top end of the word. */
14908 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14909 GEN_INT (8 * (4 - last_bytes
))));
14910 part_bytes_reg
= tmp
;
14914 mem
= adjust_automodify_address (dstbase
, QImode
,
14915 plus_constant (Pmode
, dst
,
14917 dstoffset
+ last_bytes
- 1);
14918 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14922 tmp
= gen_reg_rtx (SImode
);
14923 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14924 part_bytes_reg
= tmp
;
14931 if (last_bytes
> 1)
14933 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14934 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14938 rtx tmp
= gen_reg_rtx (SImode
);
14939 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14940 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14941 part_bytes_reg
= tmp
;
14948 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14949 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14956 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14959 next_consecutive_mem (rtx mem
)
14961 machine_mode mode
= GET_MODE (mem
);
14962 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14963 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14965 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14968 /* Copy using LDRD/STRD instructions whenever possible.
14969 Returns true upon success. */
14971 gen_movmem_ldrd_strd (rtx
*operands
)
14973 unsigned HOST_WIDE_INT len
;
14974 HOST_WIDE_INT align
;
14975 rtx src
, dst
, base
;
14977 bool src_aligned
, dst_aligned
;
14978 bool src_volatile
, dst_volatile
;
14980 gcc_assert (CONST_INT_P (operands
[2]));
14981 gcc_assert (CONST_INT_P (operands
[3]));
14983 len
= UINTVAL (operands
[2]);
14987 /* Maximum alignment we can assume for both src and dst buffers. */
14988 align
= INTVAL (operands
[3]);
14990 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14993 /* Place src and dst addresses in registers
14994 and update the corresponding mem rtx. */
14996 dst_volatile
= MEM_VOLATILE_P (dst
);
14997 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14998 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14999 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
15002 src_volatile
= MEM_VOLATILE_P (src
);
15003 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
15004 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
15005 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
15007 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
15010 if (src_volatile
|| dst_volatile
)
15013 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15014 if (!(dst_aligned
|| src_aligned
))
15015 return arm_gen_movmemqi (operands
);
15017 /* If the either src or dst is unaligned we'll be accessing it as pairs
15018 of unaligned SImode accesses. Otherwise we can generate DImode
15019 ldrd/strd instructions. */
15020 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
15021 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
15026 reg0
= gen_reg_rtx (DImode
);
15027 rtx low_reg
= NULL_RTX
;
15028 rtx hi_reg
= NULL_RTX
;
15030 if (!src_aligned
|| !dst_aligned
)
15032 low_reg
= gen_lowpart (SImode
, reg0
);
15033 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15036 emit_move_insn (reg0
, src
);
15039 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
15040 src
= next_consecutive_mem (src
);
15041 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
15045 emit_move_insn (dst
, reg0
);
15048 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
15049 dst
= next_consecutive_mem (dst
);
15050 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
15053 src
= next_consecutive_mem (src
);
15054 dst
= next_consecutive_mem (dst
);
15057 gcc_assert (len
< 8);
15060 /* More than a word but less than a double-word to copy. Copy a word. */
15061 reg0
= gen_reg_rtx (SImode
);
15062 src
= adjust_address (src
, SImode
, 0);
15063 dst
= adjust_address (dst
, SImode
, 0);
15065 emit_move_insn (reg0
, src
);
15067 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15070 emit_move_insn (dst
, reg0
);
15072 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15074 src
= next_consecutive_mem (src
);
15075 dst
= next_consecutive_mem (dst
);
15082 /* Copy the remaining bytes. */
15085 dst
= adjust_address (dst
, HImode
, 0);
15086 src
= adjust_address (src
, HImode
, 0);
15087 reg0
= gen_reg_rtx (SImode
);
15089 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15091 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15094 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15096 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15098 src
= next_consecutive_mem (src
);
15099 dst
= next_consecutive_mem (dst
);
15104 dst
= adjust_address (dst
, QImode
, 0);
15105 src
= adjust_address (src
, QImode
, 0);
15106 reg0
= gen_reg_rtx (QImode
);
15107 emit_move_insn (reg0
, src
);
15108 emit_move_insn (dst
, reg0
);
15112 /* Select a dominance comparison mode if possible for a test of the general
15113 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15114 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15115 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15116 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15117 In all cases OP will be either EQ or NE, but we don't need to know which
15118 here. If we are unable to support a dominance comparison we return
15119 CC mode. This will then fail to match for the RTL expressions that
15120 generate this call. */
15122 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15124 enum rtx_code cond1
, cond2
;
15127 /* Currently we will probably get the wrong result if the individual
15128 comparisons are not simple. This also ensures that it is safe to
15129 reverse a comparison if necessary. */
15130 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
15132 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
15136 /* The if_then_else variant of this tests the second condition if the
15137 first passes, but is true if the first fails. Reverse the first
15138 condition to get a true "inclusive-or" expression. */
15139 if (cond_or
== DOM_CC_NX_OR_Y
)
15140 cond1
= reverse_condition (cond1
);
15142 /* If the comparisons are not equal, and one doesn't dominate the other,
15143 then we can't do this. */
15145 && !comparison_dominates_p (cond1
, cond2
)
15146 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15150 std::swap (cond1
, cond2
);
15155 if (cond_or
== DOM_CC_X_AND_Y
)
15160 case EQ
: return CC_DEQmode
;
15161 case LE
: return CC_DLEmode
;
15162 case LEU
: return CC_DLEUmode
;
15163 case GE
: return CC_DGEmode
;
15164 case GEU
: return CC_DGEUmode
;
15165 default: gcc_unreachable ();
15169 if (cond_or
== DOM_CC_X_AND_Y
)
15181 gcc_unreachable ();
15185 if (cond_or
== DOM_CC_X_AND_Y
)
15197 gcc_unreachable ();
15201 if (cond_or
== DOM_CC_X_AND_Y
)
15202 return CC_DLTUmode
;
15207 return CC_DLTUmode
;
15209 return CC_DLEUmode
;
15213 gcc_unreachable ();
15217 if (cond_or
== DOM_CC_X_AND_Y
)
15218 return CC_DGTUmode
;
15223 return CC_DGTUmode
;
15225 return CC_DGEUmode
;
15229 gcc_unreachable ();
15232 /* The remaining cases only occur when both comparisons are the
15235 gcc_assert (cond1
== cond2
);
15239 gcc_assert (cond1
== cond2
);
15243 gcc_assert (cond1
== cond2
);
15247 gcc_assert (cond1
== cond2
);
15248 return CC_DLEUmode
;
15251 gcc_assert (cond1
== cond2
);
15252 return CC_DGEUmode
;
15255 gcc_unreachable ();
15260 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15262 /* All floating point compares return CCFP if it is an equality
15263 comparison, and CCFPE otherwise. */
15264 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15287 gcc_unreachable ();
15291 /* A compare with a shifted operand. Because of canonicalization, the
15292 comparison will have to be swapped when we emit the assembler. */
15293 if (GET_MODE (y
) == SImode
15294 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15295 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15296 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15297 || GET_CODE (x
) == ROTATERT
))
15300 /* This operation is performed swapped, but since we only rely on the Z
15301 flag we don't need an additional mode. */
15302 if (GET_MODE (y
) == SImode
15303 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15304 && GET_CODE (x
) == NEG
15305 && (op
== EQ
|| op
== NE
))
15308 /* This is a special case that is used by combine to allow a
15309 comparison of a shifted byte load to be split into a zero-extend
15310 followed by a comparison of the shifted integer (only valid for
15311 equalities and unsigned inequalities). */
15312 if (GET_MODE (x
) == SImode
15313 && GET_CODE (x
) == ASHIFT
15314 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15315 && GET_CODE (XEXP (x
, 0)) == SUBREG
15316 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15317 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15318 && (op
== EQ
|| op
== NE
15319 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15320 && CONST_INT_P (y
))
15323 /* A construct for a conditional compare, if the false arm contains
15324 0, then both conditions must be true, otherwise either condition
15325 must be true. Not all conditions are possible, so CCmode is
15326 returned if it can't be done. */
15327 if (GET_CODE (x
) == IF_THEN_ELSE
15328 && (XEXP (x
, 2) == const0_rtx
15329 || XEXP (x
, 2) == const1_rtx
)
15330 && COMPARISON_P (XEXP (x
, 0))
15331 && COMPARISON_P (XEXP (x
, 1)))
15332 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15333 INTVAL (XEXP (x
, 2)));
15335 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15336 if (GET_CODE (x
) == AND
15337 && (op
== EQ
|| op
== NE
)
15338 && COMPARISON_P (XEXP (x
, 0))
15339 && COMPARISON_P (XEXP (x
, 1)))
15340 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15343 if (GET_CODE (x
) == IOR
15344 && (op
== EQ
|| op
== NE
)
15345 && COMPARISON_P (XEXP (x
, 0))
15346 && COMPARISON_P (XEXP (x
, 1)))
15347 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15350 /* An operation (on Thumb) where we want to test for a single bit.
15351 This is done by shifting that bit up into the top bit of a
15352 scratch register; we can then branch on the sign bit. */
15354 && GET_MODE (x
) == SImode
15355 && (op
== EQ
|| op
== NE
)
15356 && GET_CODE (x
) == ZERO_EXTRACT
15357 && XEXP (x
, 1) == const1_rtx
)
15360 /* An operation that sets the condition codes as a side-effect, the
15361 V flag is not set correctly, so we can only use comparisons where
15362 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15364 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15365 if (GET_MODE (x
) == SImode
15367 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15368 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15369 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15370 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15371 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15372 || GET_CODE (x
) == LSHIFTRT
15373 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15374 || GET_CODE (x
) == ROTATERT
15375 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15376 return CC_NOOVmode
;
15378 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15381 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15382 && GET_CODE (x
) == PLUS
15383 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15386 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15392 /* A DImode comparison against zero can be implemented by
15393 or'ing the two halves together. */
15394 if (y
== const0_rtx
)
15397 /* We can do an equality test in three Thumb instructions. */
15407 /* DImode unsigned comparisons can be implemented by cmp +
15408 cmpeq without a scratch register. Not worth doing in
15419 /* DImode signed and unsigned comparisons can be implemented
15420 by cmp + sbcs with a scratch register, but that does not
15421 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15422 gcc_assert (op
!= EQ
&& op
!= NE
);
15426 gcc_unreachable ();
15430 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15431 return GET_MODE (x
);
15436 /* X and Y are two things to compare using CODE. Emit the compare insn and
15437 return the rtx for register 0 in the proper mode. FP means this is a
15438 floating point compare: I don't think that it is needed on the arm. */
15440 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15444 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15446 /* We might have X as a constant, Y as a register because of the predicates
15447 used for cmpdi. If so, force X to a register here. */
15448 if (dimode_comparison
&& !REG_P (x
))
15449 x
= force_reg (DImode
, x
);
15451 mode
= SELECT_CC_MODE (code
, x
, y
);
15452 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15454 if (dimode_comparison
15455 && mode
!= CC_CZmode
)
15459 /* To compare two non-zero values for equality, XOR them and
15460 then compare against zero. Not used for ARM mode; there
15461 CC_CZmode is cheaper. */
15462 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15464 gcc_assert (!reload_completed
);
15465 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15469 /* A scratch register is required. */
15470 if (reload_completed
)
15471 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15473 scratch
= gen_rtx_SCRATCH (SImode
);
15475 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15476 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15477 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15480 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15485 /* Generate a sequence of insns that will generate the correct return
15486 address mask depending on the physical architecture that the program
15489 arm_gen_return_addr_mask (void)
15491 rtx reg
= gen_reg_rtx (Pmode
);
15493 emit_insn (gen_return_addr_mask (reg
));
15498 arm_reload_in_hi (rtx
*operands
)
15500 rtx ref
= operands
[1];
15502 HOST_WIDE_INT offset
= 0;
15504 if (GET_CODE (ref
) == SUBREG
)
15506 offset
= SUBREG_BYTE (ref
);
15507 ref
= SUBREG_REG (ref
);
15512 /* We have a pseudo which has been spilt onto the stack; there
15513 are two cases here: the first where there is a simple
15514 stack-slot replacement and a second where the stack-slot is
15515 out of range, or is used as a subreg. */
15516 if (reg_equiv_mem (REGNO (ref
)))
15518 ref
= reg_equiv_mem (REGNO (ref
));
15519 base
= find_replacement (&XEXP (ref
, 0));
15522 /* The slot is out of range, or was dressed up in a SUBREG. */
15523 base
= reg_equiv_address (REGNO (ref
));
15525 /* PR 62554: If there is no equivalent memory location then just move
15526 the value as an SImode register move. This happens when the target
15527 architecture variant does not have an HImode register move. */
15530 gcc_assert (REG_P (operands
[0]));
15531 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15532 gen_rtx_SUBREG (SImode
, ref
, 0)));
15537 base
= find_replacement (&XEXP (ref
, 0));
15539 /* Handle the case where the address is too complex to be offset by 1. */
15540 if (GET_CODE (base
) == MINUS
15541 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15543 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15545 emit_set_insn (base_plus
, base
);
15548 else if (GET_CODE (base
) == PLUS
)
15550 /* The addend must be CONST_INT, or we would have dealt with it above. */
15551 HOST_WIDE_INT hi
, lo
;
15553 offset
+= INTVAL (XEXP (base
, 1));
15554 base
= XEXP (base
, 0);
15556 /* Rework the address into a legal sequence of insns. */
15557 /* Valid range for lo is -4095 -> 4095 */
15560 : -((-offset
) & 0xfff));
15562 /* Corner case, if lo is the max offset then we would be out of range
15563 once we have added the additional 1 below, so bump the msb into the
15564 pre-loading insn(s). */
15568 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15569 ^ (HOST_WIDE_INT
) 0x80000000)
15570 - (HOST_WIDE_INT
) 0x80000000);
15572 gcc_assert (hi
+ lo
== offset
);
15576 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15578 /* Get the base address; addsi3 knows how to handle constants
15579 that require more than one insn. */
15580 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15586 /* Operands[2] may overlap operands[0] (though it won't overlap
15587 operands[1]), that's why we asked for a DImode reg -- so we can
15588 use the bit that does not overlap. */
15589 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15590 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15592 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15594 emit_insn (gen_zero_extendqisi2 (scratch
,
15595 gen_rtx_MEM (QImode
,
15596 plus_constant (Pmode
, base
,
15598 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15599 gen_rtx_MEM (QImode
,
15600 plus_constant (Pmode
, base
,
15602 if (!BYTES_BIG_ENDIAN
)
15603 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15604 gen_rtx_IOR (SImode
,
15607 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15611 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15612 gen_rtx_IOR (SImode
,
15613 gen_rtx_ASHIFT (SImode
, scratch
,
15615 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15618 /* Handle storing a half-word to memory during reload by synthesizing as two
15619 byte stores. Take care not to clobber the input values until after we
15620 have moved them somewhere safe. This code assumes that if the DImode
15621 scratch in operands[2] overlaps either the input value or output address
15622 in some way, then that value must die in this insn (we absolutely need
15623 two scratch registers for some corner cases). */
15625 arm_reload_out_hi (rtx
*operands
)
15627 rtx ref
= operands
[0];
15628 rtx outval
= operands
[1];
15630 HOST_WIDE_INT offset
= 0;
15632 if (GET_CODE (ref
) == SUBREG
)
15634 offset
= SUBREG_BYTE (ref
);
15635 ref
= SUBREG_REG (ref
);
15640 /* We have a pseudo which has been spilt onto the stack; there
15641 are two cases here: the first where there is a simple
15642 stack-slot replacement and a second where the stack-slot is
15643 out of range, or is used as a subreg. */
15644 if (reg_equiv_mem (REGNO (ref
)))
15646 ref
= reg_equiv_mem (REGNO (ref
));
15647 base
= find_replacement (&XEXP (ref
, 0));
15650 /* The slot is out of range, or was dressed up in a SUBREG. */
15651 base
= reg_equiv_address (REGNO (ref
));
15653 /* PR 62254: If there is no equivalent memory location then just move
15654 the value as an SImode register move. This happens when the target
15655 architecture variant does not have an HImode register move. */
15658 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15660 if (REG_P (outval
))
15662 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15663 gen_rtx_SUBREG (SImode
, outval
, 0)));
15665 else /* SUBREG_P (outval) */
15667 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15668 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15669 SUBREG_REG (outval
)));
15671 /* FIXME: Handle other cases ? */
15672 gcc_unreachable ();
15678 base
= find_replacement (&XEXP (ref
, 0));
15680 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15682 /* Handle the case where the address is too complex to be offset by 1. */
15683 if (GET_CODE (base
) == MINUS
15684 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15686 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15688 /* Be careful not to destroy OUTVAL. */
15689 if (reg_overlap_mentioned_p (base_plus
, outval
))
15691 /* Updating base_plus might destroy outval, see if we can
15692 swap the scratch and base_plus. */
15693 if (!reg_overlap_mentioned_p (scratch
, outval
))
15694 std::swap (scratch
, base_plus
);
15697 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15699 /* Be conservative and copy OUTVAL into the scratch now,
15700 this should only be necessary if outval is a subreg
15701 of something larger than a word. */
15702 /* XXX Might this clobber base? I can't see how it can,
15703 since scratch is known to overlap with OUTVAL, and
15704 must be wider than a word. */
15705 emit_insn (gen_movhi (scratch_hi
, outval
));
15706 outval
= scratch_hi
;
15710 emit_set_insn (base_plus
, base
);
15713 else if (GET_CODE (base
) == PLUS
)
15715 /* The addend must be CONST_INT, or we would have dealt with it above. */
15716 HOST_WIDE_INT hi
, lo
;
15718 offset
+= INTVAL (XEXP (base
, 1));
15719 base
= XEXP (base
, 0);
15721 /* Rework the address into a legal sequence of insns. */
15722 /* Valid range for lo is -4095 -> 4095 */
15725 : -((-offset
) & 0xfff));
15727 /* Corner case, if lo is the max offset then we would be out of range
15728 once we have added the additional 1 below, so bump the msb into the
15729 pre-loading insn(s). */
15733 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15734 ^ (HOST_WIDE_INT
) 0x80000000)
15735 - (HOST_WIDE_INT
) 0x80000000);
15737 gcc_assert (hi
+ lo
== offset
);
15741 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15743 /* Be careful not to destroy OUTVAL. */
15744 if (reg_overlap_mentioned_p (base_plus
, outval
))
15746 /* Updating base_plus might destroy outval, see if we
15747 can swap the scratch and base_plus. */
15748 if (!reg_overlap_mentioned_p (scratch
, outval
))
15749 std::swap (scratch
, base_plus
);
15752 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15754 /* Be conservative and copy outval into scratch now,
15755 this should only be necessary if outval is a
15756 subreg of something larger than a word. */
15757 /* XXX Might this clobber base? I can't see how it
15758 can, since scratch is known to overlap with
15760 emit_insn (gen_movhi (scratch_hi
, outval
));
15761 outval
= scratch_hi
;
15765 /* Get the base address; addsi3 knows how to handle constants
15766 that require more than one insn. */
15767 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15773 if (BYTES_BIG_ENDIAN
)
15775 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15776 plus_constant (Pmode
, base
,
15778 gen_lowpart (QImode
, outval
)));
15779 emit_insn (gen_lshrsi3 (scratch
,
15780 gen_rtx_SUBREG (SImode
, outval
, 0),
15782 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15784 gen_lowpart (QImode
, scratch
)));
15788 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15790 gen_lowpart (QImode
, outval
)));
15791 emit_insn (gen_lshrsi3 (scratch
,
15792 gen_rtx_SUBREG (SImode
, outval
, 0),
15794 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15795 plus_constant (Pmode
, base
,
15797 gen_lowpart (QImode
, scratch
)));
15801 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15802 (padded to the size of a word) should be passed in a register. */
15805 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15807 if (TARGET_AAPCS_BASED
)
15808 return must_pass_in_stack_var_size (mode
, type
);
15810 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15814 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15815 Return true if an argument passed on the stack should be padded upwards,
15816 i.e. if the least-significant byte has useful data.
15817 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15818 aggregate types are placed in the lowest memory address. */
15821 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15823 if (!TARGET_AAPCS_BASED
)
15824 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15826 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15833 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15834 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15835 register has useful data, and return the opposite if the most
15836 significant byte does. */
15839 arm_pad_reg_upward (machine_mode mode
,
15840 tree type
, int first ATTRIBUTE_UNUSED
)
15842 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15844 /* For AAPCS, small aggregates, small fixed-point types,
15845 and small complex types are always padded upwards. */
15848 if ((AGGREGATE_TYPE_P (type
)
15849 || TREE_CODE (type
) == COMPLEX_TYPE
15850 || FIXED_POINT_TYPE_P (type
))
15851 && int_size_in_bytes (type
) <= 4)
15856 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15857 && GET_MODE_SIZE (mode
) <= 4)
15862 /* Otherwise, use default padding. */
15863 return !BYTES_BIG_ENDIAN
;
15866 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15867 assuming that the address in the base register is word aligned. */
15869 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15871 HOST_WIDE_INT max_offset
;
15873 /* Offset must be a multiple of 4 in Thumb mode. */
15874 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15879 else if (TARGET_ARM
)
15884 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15887 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15888 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15889 Assumes that the address in the base register RN is word aligned. Pattern
15890 guarantees that both memory accesses use the same base register,
15891 the offsets are constants within the range, and the gap between the offsets is 4.
15892 If preload complete then check that registers are legal. WBACK indicates whether
15893 address is updated. LOAD indicates whether memory access is load or store. */
15895 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15896 bool wback
, bool load
)
15898 unsigned int t
, t2
, n
;
15900 if (!reload_completed
)
15903 if (!offset_ok_for_ldrd_strd (offset
))
15910 if ((TARGET_THUMB2
)
15911 && ((wback
&& (n
== t
|| n
== t2
))
15912 || (t
== SP_REGNUM
)
15913 || (t
== PC_REGNUM
)
15914 || (t2
== SP_REGNUM
)
15915 || (t2
== PC_REGNUM
)
15916 || (!load
&& (n
== PC_REGNUM
))
15917 || (load
&& (t
== t2
))
15918 /* Triggers Cortex-M3 LDRD errata. */
15919 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15923 && ((wback
&& (n
== t
|| n
== t2
))
15924 || (t2
== PC_REGNUM
)
15925 || (t
% 2 != 0) /* First destination register is not even. */
15927 /* PC can be used as base register (for offset addressing only),
15928 but it is depricated. */
15929 || (n
== PC_REGNUM
)))
15935 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15936 operand MEM's address contains an immediate offset from the base
15937 register and has no side effects, in which case it sets BASE and
15938 OFFSET accordingly. */
15940 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15944 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15946 /* TODO: Handle more general memory operand patterns, such as
15947 PRE_DEC and PRE_INC. */
15949 if (side_effects_p (mem
))
15952 /* Can't deal with subregs. */
15953 if (GET_CODE (mem
) == SUBREG
)
15956 gcc_assert (MEM_P (mem
));
15958 *offset
= const0_rtx
;
15960 addr
= XEXP (mem
, 0);
15962 /* If addr isn't valid for DImode, then we can't handle it. */
15963 if (!arm_legitimate_address_p (DImode
, addr
,
15964 reload_in_progress
|| reload_completed
))
15972 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15974 *base
= XEXP (addr
, 0);
15975 *offset
= XEXP (addr
, 1);
15976 return (REG_P (*base
) && CONST_INT_P (*offset
));
15982 /* Called from a peephole2 to replace two word-size accesses with a
15983 single LDRD/STRD instruction. Returns true iff we can generate a
15984 new instruction sequence. That is, both accesses use the same base
15985 register and the gap between constant offsets is 4. This function
15986 may reorder its operands to match ldrd/strd RTL templates.
15987 OPERANDS are the operands found by the peephole matcher;
15988 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15989 corresponding memory operands. LOAD indicaates whether the access
15990 is load or store. CONST_STORE indicates a store of constant
15991 integer values held in OPERANDS[4,5] and assumes that the pattern
15992 is of length 4 insn, for the purpose of checking dead registers.
15993 COMMUTE indicates that register operands may be reordered. */
15995 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15996 bool const_store
, bool commute
)
15999 HOST_WIDE_INT offsets
[2], offset
;
16000 rtx base
= NULL_RTX
;
16001 rtx cur_base
, cur_offset
, tmp
;
16003 HARD_REG_SET regset
;
16005 gcc_assert (!const_store
|| !load
);
16006 /* Check that the memory references are immediate offsets from the
16007 same base register. Extract the base register, the destination
16008 registers, and the corresponding memory offsets. */
16009 for (i
= 0; i
< nops
; i
++)
16011 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
16016 else if (REGNO (base
) != REGNO (cur_base
))
16019 offsets
[i
] = INTVAL (cur_offset
);
16020 if (GET_CODE (operands
[i
]) == SUBREG
)
16022 tmp
= SUBREG_REG (operands
[i
]);
16023 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
16028 /* Make sure there is no dependency between the individual loads. */
16029 if (load
&& REGNO (operands
[0]) == REGNO (base
))
16030 return false; /* RAW */
16032 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
16033 return false; /* WAW */
16035 /* If the same input register is used in both stores
16036 when storing different constants, try to find a free register.
16037 For example, the code
16042 can be transformed into
16046 in Thumb mode assuming that r1 is free.
16047 For ARM mode do the same but only if the starting register
16048 can be made to be even. */
16050 && REGNO (operands
[0]) == REGNO (operands
[1])
16051 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
16055 CLEAR_HARD_REG_SET (regset
);
16056 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16057 if (tmp
== NULL_RTX
)
16060 /* Use the new register in the first load to ensure that
16061 if the original input register is not dead after peephole,
16062 then it will have the correct constant value. */
16065 else if (TARGET_ARM
)
16067 int regno
= REGNO (operands
[0]);
16068 if (!peep2_reg_dead_p (4, operands
[0]))
16070 /* When the input register is even and is not dead after the
16071 pattern, it has to hold the second constant but we cannot
16072 form a legal STRD in ARM mode with this register as the second
16074 if (regno
% 2 == 0)
16077 /* Is regno-1 free? */
16078 SET_HARD_REG_SET (regset
);
16079 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
16080 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16081 if (tmp
== NULL_RTX
)
16088 /* Find a DImode register. */
16089 CLEAR_HARD_REG_SET (regset
);
16090 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16091 if (tmp
!= NULL_RTX
)
16093 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16094 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16098 /* Can we use the input register to form a DI register? */
16099 SET_HARD_REG_SET (regset
);
16100 CLEAR_HARD_REG_BIT(regset
,
16101 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
16102 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
16103 if (tmp
== NULL_RTX
)
16105 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
16109 gcc_assert (operands
[0] != NULL_RTX
);
16110 gcc_assert (operands
[1] != NULL_RTX
);
16111 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16112 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
16116 /* Make sure the instructions are ordered with lower memory access first. */
16117 if (offsets
[0] > offsets
[1])
16119 gap
= offsets
[0] - offsets
[1];
16120 offset
= offsets
[1];
16122 /* Swap the instructions such that lower memory is accessed first. */
16123 std::swap (operands
[0], operands
[1]);
16124 std::swap (operands
[2], operands
[3]);
16126 std::swap (operands
[4], operands
[5]);
16130 gap
= offsets
[1] - offsets
[0];
16131 offset
= offsets
[0];
16134 /* Make sure accesses are to consecutive memory locations. */
16138 /* Make sure we generate legal instructions. */
16139 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16143 /* In Thumb state, where registers are almost unconstrained, there
16144 is little hope to fix it. */
16148 if (load
&& commute
)
16150 /* Try reordering registers. */
16151 std::swap (operands
[0], operands
[1]);
16152 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
16159 /* If input registers are dead after this pattern, they can be
16160 reordered or replaced by other registers that are free in the
16161 current pattern. */
16162 if (!peep2_reg_dead_p (4, operands
[0])
16163 || !peep2_reg_dead_p (4, operands
[1]))
16166 /* Try to reorder the input registers. */
16167 /* For example, the code
16172 can be transformed into
16177 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
16180 std::swap (operands
[0], operands
[1]);
16184 /* Try to find a free DI register. */
16185 CLEAR_HARD_REG_SET (regset
);
16186 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
16187 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
16190 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16191 if (tmp
== NULL_RTX
)
16194 /* DREG must be an even-numbered register in DImode.
16195 Split it into SI registers. */
16196 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16197 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16198 gcc_assert (operands
[0] != NULL_RTX
);
16199 gcc_assert (operands
[1] != NULL_RTX
);
16200 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16201 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
16203 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
16215 /* Print a symbolic form of X to the debug file, F. */
16217 arm_print_value (FILE *f
, rtx x
)
16219 switch (GET_CODE (x
))
16222 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16226 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
16234 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16236 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16237 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16245 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16249 fprintf (f
, "`%s'", XSTR (x
, 0));
16253 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16257 arm_print_value (f
, XEXP (x
, 0));
16261 arm_print_value (f
, XEXP (x
, 0));
16263 arm_print_value (f
, XEXP (x
, 1));
16271 fprintf (f
, "????");
16276 /* Routines for manipulation of the constant pool. */
16278 /* Arm instructions cannot load a large constant directly into a
16279 register; they have to come from a pc relative load. The constant
16280 must therefore be placed in the addressable range of the pc
16281 relative load. Depending on the precise pc relative load
16282 instruction the range is somewhere between 256 bytes and 4k. This
16283 means that we often have to dump a constant inside a function, and
16284 generate code to branch around it.
16286 It is important to minimize this, since the branches will slow
16287 things down and make the code larger.
16289 Normally we can hide the table after an existing unconditional
16290 branch so that there is no interruption of the flow, but in the
16291 worst case the code looks like this:
16309 We fix this by performing a scan after scheduling, which notices
16310 which instructions need to have their operands fetched from the
16311 constant table and builds the table.
16313 The algorithm starts by building a table of all the constants that
16314 need fixing up and all the natural barriers in the function (places
16315 where a constant table can be dropped without breaking the flow).
16316 For each fixup we note how far the pc-relative replacement will be
16317 able to reach and the offset of the instruction into the function.
16319 Having built the table we then group the fixes together to form
16320 tables that are as large as possible (subject to addressing
16321 constraints) and emit each table of constants after the last
16322 barrier that is within range of all the instructions in the group.
16323 If a group does not contain a barrier, then we forcibly create one
16324 by inserting a jump instruction into the flow. Once the table has
16325 been inserted, the insns are then modified to reference the
16326 relevant entry in the pool.
16328 Possible enhancements to the algorithm (not implemented) are:
16330 1) For some processors and object formats, there may be benefit in
16331 aligning the pools to the start of cache lines; this alignment
16332 would need to be taken into account when calculating addressability
16335 /* These typedefs are located at the start of this file, so that
16336 they can be used in the prototypes there. This comment is to
16337 remind readers of that fact so that the following structures
16338 can be understood more easily.
16340 typedef struct minipool_node Mnode;
16341 typedef struct minipool_fixup Mfix; */
16343 struct minipool_node
16345 /* Doubly linked chain of entries. */
16348 /* The maximum offset into the code that this entry can be placed. While
16349 pushing fixes for forward references, all entries are sorted in order
16350 of increasing max_address. */
16351 HOST_WIDE_INT max_address
;
16352 /* Similarly for an entry inserted for a backwards ref. */
16353 HOST_WIDE_INT min_address
;
16354 /* The number of fixes referencing this entry. This can become zero
16355 if we "unpush" an entry. In this case we ignore the entry when we
16356 come to emit the code. */
16358 /* The offset from the start of the minipool. */
16359 HOST_WIDE_INT offset
;
16360 /* The value in table. */
16362 /* The mode of value. */
16364 /* The size of the value. With iWMMXt enabled
16365 sizes > 4 also imply an alignment of 8-bytes. */
16369 struct minipool_fixup
16373 HOST_WIDE_INT address
;
16379 HOST_WIDE_INT forwards
;
16380 HOST_WIDE_INT backwards
;
16383 /* Fixes less than a word need padding out to a word boundary. */
16384 #define MINIPOOL_FIX_SIZE(mode) \
16385 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16387 static Mnode
* minipool_vector_head
;
16388 static Mnode
* minipool_vector_tail
;
16389 static rtx_code_label
*minipool_vector_label
;
16390 static int minipool_pad
;
16392 /* The linked list of all minipool fixes required for this function. */
16393 Mfix
* minipool_fix_head
;
16394 Mfix
* minipool_fix_tail
;
16395 /* The fix entry for the current minipool, once it has been placed. */
16396 Mfix
* minipool_barrier
;
16398 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16399 #define JUMP_TABLES_IN_TEXT_SECTION 0
16402 static HOST_WIDE_INT
16403 get_jump_table_size (rtx_jump_table_data
*insn
)
16405 /* ADDR_VECs only take room if read-only data does into the text
16407 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16409 rtx body
= PATTERN (insn
);
16410 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16411 HOST_WIDE_INT size
;
16412 HOST_WIDE_INT modesize
;
16414 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16415 size
= modesize
* XVECLEN (body
, elt
);
16419 /* Round up size of TBB table to a halfword boundary. */
16420 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
16423 /* No padding necessary for TBH. */
16426 /* Add two bytes for alignment on Thumb. */
16431 gcc_unreachable ();
16439 /* Return the maximum amount of padding that will be inserted before
16442 static HOST_WIDE_INT
16443 get_label_padding (rtx label
)
16445 HOST_WIDE_INT align
, min_insn_size
;
16447 align
= 1 << label_to_alignment (label
);
16448 min_insn_size
= TARGET_THUMB
? 2 : 4;
16449 return align
> min_insn_size
? align
- min_insn_size
: 0;
16452 /* Move a minipool fix MP from its current location to before MAX_MP.
16453 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16454 constraints may need updating. */
16456 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16457 HOST_WIDE_INT max_address
)
16459 /* The code below assumes these are different. */
16460 gcc_assert (mp
!= max_mp
);
16462 if (max_mp
== NULL
)
16464 if (max_address
< mp
->max_address
)
16465 mp
->max_address
= max_address
;
16469 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16470 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16472 mp
->max_address
= max_address
;
16474 /* Unlink MP from its current position. Since max_mp is non-null,
16475 mp->prev must be non-null. */
16476 mp
->prev
->next
= mp
->next
;
16477 if (mp
->next
!= NULL
)
16478 mp
->next
->prev
= mp
->prev
;
16480 minipool_vector_tail
= mp
->prev
;
16482 /* Re-insert it before MAX_MP. */
16484 mp
->prev
= max_mp
->prev
;
16487 if (mp
->prev
!= NULL
)
16488 mp
->prev
->next
= mp
;
16490 minipool_vector_head
= mp
;
16493 /* Save the new entry. */
16496 /* Scan over the preceding entries and adjust their addresses as
16498 while (mp
->prev
!= NULL
16499 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16501 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16508 /* Add a constant to the minipool for a forward reference. Returns the
16509 node added or NULL if the constant will not fit in this pool. */
16511 add_minipool_forward_ref (Mfix
*fix
)
16513 /* If set, max_mp is the first pool_entry that has a lower
16514 constraint than the one we are trying to add. */
16515 Mnode
* max_mp
= NULL
;
16516 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16519 /* If the minipool starts before the end of FIX->INSN then this FIX
16520 can not be placed into the current pool. Furthermore, adding the
16521 new constant pool entry may cause the pool to start FIX_SIZE bytes
16523 if (minipool_vector_head
&&
16524 (fix
->address
+ get_attr_length (fix
->insn
)
16525 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16528 /* Scan the pool to see if a constant with the same value has
16529 already been added. While we are doing this, also note the
16530 location where we must insert the constant if it doesn't already
16532 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16534 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16535 && fix
->mode
== mp
->mode
16536 && (!LABEL_P (fix
->value
)
16537 || (CODE_LABEL_NUMBER (fix
->value
)
16538 == CODE_LABEL_NUMBER (mp
->value
)))
16539 && rtx_equal_p (fix
->value
, mp
->value
))
16541 /* More than one fix references this entry. */
16543 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16546 /* Note the insertion point if necessary. */
16548 && mp
->max_address
> max_address
)
16551 /* If we are inserting an 8-bytes aligned quantity and
16552 we have not already found an insertion point, then
16553 make sure that all such 8-byte aligned quantities are
16554 placed at the start of the pool. */
16555 if (ARM_DOUBLEWORD_ALIGN
16557 && fix
->fix_size
>= 8
16558 && mp
->fix_size
< 8)
16561 max_address
= mp
->max_address
;
16565 /* The value is not currently in the minipool, so we need to create
16566 a new entry for it. If MAX_MP is NULL, the entry will be put on
16567 the end of the list since the placement is less constrained than
16568 any existing entry. Otherwise, we insert the new fix before
16569 MAX_MP and, if necessary, adjust the constraints on the other
16572 mp
->fix_size
= fix
->fix_size
;
16573 mp
->mode
= fix
->mode
;
16574 mp
->value
= fix
->value
;
16576 /* Not yet required for a backwards ref. */
16577 mp
->min_address
= -65536;
16579 if (max_mp
== NULL
)
16581 mp
->max_address
= max_address
;
16583 mp
->prev
= minipool_vector_tail
;
16585 if (mp
->prev
== NULL
)
16587 minipool_vector_head
= mp
;
16588 minipool_vector_label
= gen_label_rtx ();
16591 mp
->prev
->next
= mp
;
16593 minipool_vector_tail
= mp
;
16597 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16598 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16600 mp
->max_address
= max_address
;
16603 mp
->prev
= max_mp
->prev
;
16605 if (mp
->prev
!= NULL
)
16606 mp
->prev
->next
= mp
;
16608 minipool_vector_head
= mp
;
16611 /* Save the new entry. */
16614 /* Scan over the preceding entries and adjust their addresses as
16616 while (mp
->prev
!= NULL
16617 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16619 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16627 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16628 HOST_WIDE_INT min_address
)
16630 HOST_WIDE_INT offset
;
16632 /* The code below assumes these are different. */
16633 gcc_assert (mp
!= min_mp
);
16635 if (min_mp
== NULL
)
16637 if (min_address
> mp
->min_address
)
16638 mp
->min_address
= min_address
;
16642 /* We will adjust this below if it is too loose. */
16643 mp
->min_address
= min_address
;
16645 /* Unlink MP from its current position. Since min_mp is non-null,
16646 mp->next must be non-null. */
16647 mp
->next
->prev
= mp
->prev
;
16648 if (mp
->prev
!= NULL
)
16649 mp
->prev
->next
= mp
->next
;
16651 minipool_vector_head
= mp
->next
;
16653 /* Reinsert it after MIN_MP. */
16655 mp
->next
= min_mp
->next
;
16657 if (mp
->next
!= NULL
)
16658 mp
->next
->prev
= mp
;
16660 minipool_vector_tail
= mp
;
16666 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16668 mp
->offset
= offset
;
16669 if (mp
->refcount
> 0)
16670 offset
+= mp
->fix_size
;
16672 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16673 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16679 /* Add a constant to the minipool for a backward reference. Returns the
16680 node added or NULL if the constant will not fit in this pool.
16682 Note that the code for insertion for a backwards reference can be
16683 somewhat confusing because the calculated offsets for each fix do
16684 not take into account the size of the pool (which is still under
16687 add_minipool_backward_ref (Mfix
*fix
)
16689 /* If set, min_mp is the last pool_entry that has a lower constraint
16690 than the one we are trying to add. */
16691 Mnode
*min_mp
= NULL
;
16692 /* This can be negative, since it is only a constraint. */
16693 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16696 /* If we can't reach the current pool from this insn, or if we can't
16697 insert this entry at the end of the pool without pushing other
16698 fixes out of range, then we don't try. This ensures that we
16699 can't fail later on. */
16700 if (min_address
>= minipool_barrier
->address
16701 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16702 >= minipool_barrier
->address
))
16705 /* Scan the pool to see if a constant with the same value has
16706 already been added. While we are doing this, also note the
16707 location where we must insert the constant if it doesn't already
16709 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16711 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16712 && fix
->mode
== mp
->mode
16713 && (!LABEL_P (fix
->value
)
16714 || (CODE_LABEL_NUMBER (fix
->value
)
16715 == CODE_LABEL_NUMBER (mp
->value
)))
16716 && rtx_equal_p (fix
->value
, mp
->value
)
16717 /* Check that there is enough slack to move this entry to the
16718 end of the table (this is conservative). */
16719 && (mp
->max_address
16720 > (minipool_barrier
->address
16721 + minipool_vector_tail
->offset
16722 + minipool_vector_tail
->fix_size
)))
16725 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16728 if (min_mp
!= NULL
)
16729 mp
->min_address
+= fix
->fix_size
;
16732 /* Note the insertion point if necessary. */
16733 if (mp
->min_address
< min_address
)
16735 /* For now, we do not allow the insertion of 8-byte alignment
16736 requiring nodes anywhere but at the start of the pool. */
16737 if (ARM_DOUBLEWORD_ALIGN
16738 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16743 else if (mp
->max_address
16744 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16746 /* Inserting before this entry would push the fix beyond
16747 its maximum address (which can happen if we have
16748 re-located a forwards fix); force the new fix to come
16750 if (ARM_DOUBLEWORD_ALIGN
16751 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16756 min_address
= mp
->min_address
+ fix
->fix_size
;
16759 /* Do not insert a non-8-byte aligned quantity before 8-byte
16760 aligned quantities. */
16761 else if (ARM_DOUBLEWORD_ALIGN
16762 && fix
->fix_size
< 8
16763 && mp
->fix_size
>= 8)
16766 min_address
= mp
->min_address
+ fix
->fix_size
;
16771 /* We need to create a new entry. */
16773 mp
->fix_size
= fix
->fix_size
;
16774 mp
->mode
= fix
->mode
;
16775 mp
->value
= fix
->value
;
16777 mp
->max_address
= minipool_barrier
->address
+ 65536;
16779 mp
->min_address
= min_address
;
16781 if (min_mp
== NULL
)
16784 mp
->next
= minipool_vector_head
;
16786 if (mp
->next
== NULL
)
16788 minipool_vector_tail
= mp
;
16789 minipool_vector_label
= gen_label_rtx ();
16792 mp
->next
->prev
= mp
;
16794 minipool_vector_head
= mp
;
16798 mp
->next
= min_mp
->next
;
16802 if (mp
->next
!= NULL
)
16803 mp
->next
->prev
= mp
;
16805 minipool_vector_tail
= mp
;
16808 /* Save the new entry. */
16816 /* Scan over the following entries and adjust their offsets. */
16817 while (mp
->next
!= NULL
)
16819 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16820 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16823 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16825 mp
->next
->offset
= mp
->offset
;
16834 assign_minipool_offsets (Mfix
*barrier
)
16836 HOST_WIDE_INT offset
= 0;
16839 minipool_barrier
= barrier
;
16841 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16843 mp
->offset
= offset
;
16845 if (mp
->refcount
> 0)
16846 offset
+= mp
->fix_size
;
16850 /* Output the literal table */
16852 dump_minipool (rtx_insn
*scan
)
16858 if (ARM_DOUBLEWORD_ALIGN
)
16859 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16860 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16867 fprintf (dump_file
,
16868 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16869 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16871 scan
= emit_label_after (gen_label_rtx (), scan
);
16872 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16873 scan
= emit_label_after (minipool_vector_label
, scan
);
16875 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16877 if (mp
->refcount
> 0)
16881 fprintf (dump_file
,
16882 ";; Offset %u, min %ld, max %ld ",
16883 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16884 (unsigned long) mp
->max_address
);
16885 arm_print_value (dump_file
, mp
->value
);
16886 fputc ('\n', dump_file
);
16889 switch (GET_MODE_SIZE (mp
->mode
))
16891 #ifdef HAVE_consttable_1
16893 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16897 #ifdef HAVE_consttable_2
16899 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16903 #ifdef HAVE_consttable_4
16905 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16909 #ifdef HAVE_consttable_8
16911 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16915 #ifdef HAVE_consttable_16
16917 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16922 gcc_unreachable ();
16930 minipool_vector_head
= minipool_vector_tail
= NULL
;
16931 scan
= emit_insn_after (gen_consttable_end (), scan
);
16932 scan
= emit_barrier_after (scan
);
16935 /* Return the cost of forcibly inserting a barrier after INSN. */
16937 arm_barrier_cost (rtx_insn
*insn
)
16939 /* Basing the location of the pool on the loop depth is preferable,
16940 but at the moment, the basic block information seems to be
16941 corrupt by this stage of the compilation. */
16942 int base_cost
= 50;
16943 rtx_insn
*next
= next_nonnote_insn (insn
);
16945 if (next
!= NULL
&& LABEL_P (next
))
16948 switch (GET_CODE (insn
))
16951 /* It will always be better to place the table before the label, rather
16960 return base_cost
- 10;
16963 return base_cost
+ 10;
16967 /* Find the best place in the insn stream in the range
16968 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16969 Create the barrier by inserting a jump and add a new fix entry for
16972 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16974 HOST_WIDE_INT count
= 0;
16975 rtx_barrier
*barrier
;
16976 rtx_insn
*from
= fix
->insn
;
16977 /* The instruction after which we will insert the jump. */
16978 rtx_insn
*selected
= NULL
;
16980 /* The address at which the jump instruction will be placed. */
16981 HOST_WIDE_INT selected_address
;
16983 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16984 rtx_code_label
*label
= gen_label_rtx ();
16986 selected_cost
= arm_barrier_cost (from
);
16987 selected_address
= fix
->address
;
16989 while (from
&& count
< max_count
)
16991 rtx_jump_table_data
*tmp
;
16994 /* This code shouldn't have been called if there was a natural barrier
16996 gcc_assert (!BARRIER_P (from
));
16998 /* Count the length of this insn. This must stay in sync with the
16999 code that pushes minipool fixes. */
17000 if (LABEL_P (from
))
17001 count
+= get_label_padding (from
);
17003 count
+= get_attr_length (from
);
17005 /* If there is a jump table, add its length. */
17006 if (tablejump_p (from
, NULL
, &tmp
))
17008 count
+= get_jump_table_size (tmp
);
17010 /* Jump tables aren't in a basic block, so base the cost on
17011 the dispatch insn. If we select this location, we will
17012 still put the pool after the table. */
17013 new_cost
= arm_barrier_cost (from
);
17015 if (count
< max_count
17016 && (!selected
|| new_cost
<= selected_cost
))
17019 selected_cost
= new_cost
;
17020 selected_address
= fix
->address
+ count
;
17023 /* Continue after the dispatch table. */
17024 from
= NEXT_INSN (tmp
);
17028 new_cost
= arm_barrier_cost (from
);
17030 if (count
< max_count
17031 && (!selected
|| new_cost
<= selected_cost
))
17034 selected_cost
= new_cost
;
17035 selected_address
= fix
->address
+ count
;
17038 from
= NEXT_INSN (from
);
17041 /* Make sure that we found a place to insert the jump. */
17042 gcc_assert (selected
);
17044 /* Make sure we do not split a call and its corresponding
17045 CALL_ARG_LOCATION note. */
17046 if (CALL_P (selected
))
17048 rtx_insn
*next
= NEXT_INSN (selected
);
17049 if (next
&& NOTE_P (next
)
17050 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
17054 /* Create a new JUMP_INSN that branches around a barrier. */
17055 from
= emit_jump_insn_after (gen_jump (label
), selected
);
17056 JUMP_LABEL (from
) = label
;
17057 barrier
= emit_barrier_after (from
);
17058 emit_label_after (label
, barrier
);
17060 /* Create a minipool barrier entry for the new barrier. */
17061 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
17062 new_fix
->insn
= barrier
;
17063 new_fix
->address
= selected_address
;
17064 new_fix
->next
= fix
->next
;
17065 fix
->next
= new_fix
;
17070 /* Record that there is a natural barrier in the insn stream at
17073 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
17075 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17078 fix
->address
= address
;
17081 if (minipool_fix_head
!= NULL
)
17082 minipool_fix_tail
->next
= fix
;
17084 minipool_fix_head
= fix
;
17086 minipool_fix_tail
= fix
;
17089 /* Record INSN, which will need fixing up to load a value from the
17090 minipool. ADDRESS is the offset of the insn since the start of the
17091 function; LOC is a pointer to the part of the insn which requires
17092 fixing; VALUE is the constant that must be loaded, which is of type
17095 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
17096 machine_mode mode
, rtx value
)
17098 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
17101 fix
->address
= address
;
17104 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
17105 fix
->value
= value
;
17106 fix
->forwards
= get_attr_pool_range (insn
);
17107 fix
->backwards
= get_attr_neg_pool_range (insn
);
17108 fix
->minipool
= NULL
;
17110 /* If an insn doesn't have a range defined for it, then it isn't
17111 expecting to be reworked by this code. Better to stop now than
17112 to generate duff assembly code. */
17113 gcc_assert (fix
->forwards
|| fix
->backwards
);
17115 /* If an entry requires 8-byte alignment then assume all constant pools
17116 require 4 bytes of padding. Trying to do this later on a per-pool
17117 basis is awkward because existing pool entries have to be modified. */
17118 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
17123 fprintf (dump_file
,
17124 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17125 GET_MODE_NAME (mode
),
17126 INSN_UID (insn
), (unsigned long) address
,
17127 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
17128 arm_print_value (dump_file
, fix
->value
);
17129 fprintf (dump_file
, "\n");
17132 /* Add it to the chain of fixes. */
17135 if (minipool_fix_head
!= NULL
)
17136 minipool_fix_tail
->next
= fix
;
17138 minipool_fix_head
= fix
;
17140 minipool_fix_tail
= fix
;
17143 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17144 Returns the number of insns needed, or 99 if we always want to synthesize
17147 arm_max_const_double_inline_cost ()
17149 /* Let the value get synthesized to avoid the use of literal pools. */
17150 if (arm_disable_literal_pool
)
17153 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
17156 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17157 Returns the number of insns needed, or 99 if we don't know how to
17160 arm_const_double_inline_cost (rtx val
)
17162 rtx lowpart
, highpart
;
17165 mode
= GET_MODE (val
);
17167 if (mode
== VOIDmode
)
17170 gcc_assert (GET_MODE_SIZE (mode
) == 8);
17172 lowpart
= gen_lowpart (SImode
, val
);
17173 highpart
= gen_highpart_mode (SImode
, mode
, val
);
17175 gcc_assert (CONST_INT_P (lowpart
));
17176 gcc_assert (CONST_INT_P (highpart
));
17178 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
17179 NULL_RTX
, NULL_RTX
, 0, 0)
17180 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
17181 NULL_RTX
, NULL_RTX
, 0, 0));
17184 /* Cost of loading a SImode constant. */
17186 arm_const_inline_cost (enum rtx_code code
, rtx val
)
17188 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
17189 NULL_RTX
, NULL_RTX
, 1, 0);
17192 /* Return true if it is worthwhile to split a 64-bit constant into two
17193 32-bit operations. This is the case if optimizing for size, or
17194 if we have load delay slots, or if one 32-bit part can be done with
17195 a single data operation. */
17197 arm_const_double_by_parts (rtx val
)
17199 machine_mode mode
= GET_MODE (val
);
17202 if (optimize_size
|| arm_ld_sched
)
17205 if (mode
== VOIDmode
)
17208 part
= gen_highpart_mode (SImode
, mode
, val
);
17210 gcc_assert (CONST_INT_P (part
));
17212 if (const_ok_for_arm (INTVAL (part
))
17213 || const_ok_for_arm (~INTVAL (part
)))
17216 part
= gen_lowpart (SImode
, val
);
17218 gcc_assert (CONST_INT_P (part
));
17220 if (const_ok_for_arm (INTVAL (part
))
17221 || const_ok_for_arm (~INTVAL (part
)))
17227 /* Return true if it is possible to inline both the high and low parts
17228 of a 64-bit constant into 32-bit data processing instructions. */
17230 arm_const_double_by_immediates (rtx val
)
17232 machine_mode mode
= GET_MODE (val
);
17235 if (mode
== VOIDmode
)
17238 part
= gen_highpart_mode (SImode
, mode
, val
);
17240 gcc_assert (CONST_INT_P (part
));
17242 if (!const_ok_for_arm (INTVAL (part
)))
17245 part
= gen_lowpart (SImode
, val
);
17247 gcc_assert (CONST_INT_P (part
));
17249 if (!const_ok_for_arm (INTVAL (part
)))
17255 /* Scan INSN and note any of its operands that need fixing.
17256 If DO_PUSHES is false we do not actually push any of the fixups
17259 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17263 extract_constrain_insn (insn
);
17265 if (recog_data
.n_alternatives
== 0)
17268 /* Fill in recog_op_alt with information about the constraints of
17270 preprocess_constraints (insn
);
17272 const operand_alternative
*op_alt
= which_op_alt ();
17273 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17275 /* Things we need to fix can only occur in inputs. */
17276 if (recog_data
.operand_type
[opno
] != OP_IN
)
17279 /* If this alternative is a memory reference, then any mention
17280 of constants in this alternative is really to fool reload
17281 into allowing us to accept one there. We need to fix them up
17282 now so that we output the right code. */
17283 if (op_alt
[opno
].memory_ok
)
17285 rtx op
= recog_data
.operand
[opno
];
17287 if (CONSTANT_P (op
))
17290 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17291 recog_data
.operand_mode
[opno
], op
);
17293 else if (MEM_P (op
)
17294 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17295 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17299 rtx cop
= avoid_constant_pool_reference (op
);
17301 /* Casting the address of something to a mode narrower
17302 than a word can cause avoid_constant_pool_reference()
17303 to return the pool reference itself. That's no good to
17304 us here. Lets just hope that we can use the
17305 constant pool value directly. */
17307 cop
= get_pool_constant (XEXP (op
, 0));
17309 push_minipool_fix (insn
, address
,
17310 recog_data
.operand_loc
[opno
],
17311 recog_data
.operand_mode
[opno
], cop
);
17321 /* Rewrite move insn into subtract of 0 if the condition codes will
17322 be useful in next conditional jump insn. */
17325 thumb1_reorg (void)
17329 FOR_EACH_BB_FN (bb
, cfun
)
17332 rtx cmp
, op0
, op1
, set
= NULL
;
17333 rtx_insn
*prev
, *insn
= BB_END (bb
);
17334 bool insn_clobbered
= false;
17336 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17337 insn
= PREV_INSN (insn
);
17339 /* Find the last cbranchsi4_insn in basic block BB. */
17340 if (insn
== BB_HEAD (bb
)
17341 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17344 /* Get the register with which we are comparing. */
17345 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17346 op0
= XEXP (cmp
, 0);
17347 op1
= XEXP (cmp
, 1);
17349 /* Check that comparison is against ZERO. */
17350 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17353 /* Find the first flag setting insn before INSN in basic block BB. */
17354 gcc_assert (insn
!= BB_HEAD (bb
));
17355 for (prev
= PREV_INSN (insn
);
17357 && prev
!= BB_HEAD (bb
)
17359 || DEBUG_INSN_P (prev
)
17360 || ((set
= single_set (prev
)) != NULL
17361 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17362 prev
= PREV_INSN (prev
))
17364 if (reg_set_p (op0
, prev
))
17365 insn_clobbered
= true;
17368 /* Skip if op0 is clobbered by insn other than prev. */
17369 if (insn_clobbered
)
17375 dest
= SET_DEST (set
);
17376 src
= SET_SRC (set
);
17377 if (!low_register_operand (dest
, SImode
)
17378 || !low_register_operand (src
, SImode
))
17381 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17382 in INSN. Both src and dest of the move insn are checked. */
17383 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17385 dest
= copy_rtx (dest
);
17386 src
= copy_rtx (src
);
17387 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17388 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17389 INSN_CODE (prev
) = -1;
17390 /* Set test register in INSN to dest. */
17391 XEXP (cmp
, 0) = copy_rtx (dest
);
17392 INSN_CODE (insn
) = -1;
17397 /* Convert instructions to their cc-clobbering variant if possible, since
17398 that allows us to use smaller encodings. */
17401 thumb2_reorg (void)
17406 INIT_REG_SET (&live
);
17408 /* We are freeing block_for_insn in the toplev to keep compatibility
17409 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17410 compute_bb_for_insn ();
17413 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17415 FOR_EACH_BB_FN (bb
, cfun
)
17417 if ((current_tune
->disparage_flag_setting_t16_encodings
17418 == tune_params::DISPARAGE_FLAGS_ALL
)
17419 && optimize_bb_for_speed_p (bb
))
17423 Convert_Action action
= SKIP
;
17424 Convert_Action action_for_partial_flag_setting
17425 = ((current_tune
->disparage_flag_setting_t16_encodings
17426 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17427 && optimize_bb_for_speed_p (bb
))
17430 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17431 df_simulate_initialize_backwards (bb
, &live
);
17432 FOR_BB_INSNS_REVERSE (bb
, insn
)
17434 if (NONJUMP_INSN_P (insn
)
17435 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17436 && GET_CODE (PATTERN (insn
)) == SET
)
17439 rtx pat
= PATTERN (insn
);
17440 rtx dst
= XEXP (pat
, 0);
17441 rtx src
= XEXP (pat
, 1);
17442 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17444 if (UNARY_P (src
) || BINARY_P (src
))
17445 op0
= XEXP (src
, 0);
17447 if (BINARY_P (src
))
17448 op1
= XEXP (src
, 1);
17450 if (low_register_operand (dst
, SImode
))
17452 switch (GET_CODE (src
))
17455 /* Adding two registers and storing the result
17456 in the first source is already a 16-bit
17458 if (rtx_equal_p (dst
, op0
)
17459 && register_operand (op1
, SImode
))
17462 if (low_register_operand (op0
, SImode
))
17464 /* ADDS <Rd>,<Rn>,<Rm> */
17465 if (low_register_operand (op1
, SImode
))
17467 /* ADDS <Rdn>,#<imm8> */
17468 /* SUBS <Rdn>,#<imm8> */
17469 else if (rtx_equal_p (dst
, op0
)
17470 && CONST_INT_P (op1
)
17471 && IN_RANGE (INTVAL (op1
), -255, 255))
17473 /* ADDS <Rd>,<Rn>,#<imm3> */
17474 /* SUBS <Rd>,<Rn>,#<imm3> */
17475 else if (CONST_INT_P (op1
)
17476 && IN_RANGE (INTVAL (op1
), -7, 7))
17479 /* ADCS <Rd>, <Rn> */
17480 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17481 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17482 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17484 && COMPARISON_P (op1
)
17485 && cc_register (XEXP (op1
, 0), VOIDmode
)
17486 && maybe_get_arm_condition_code (op1
) == ARM_CS
17487 && XEXP (op1
, 1) == const0_rtx
)
17492 /* RSBS <Rd>,<Rn>,#0
17493 Not handled here: see NEG below. */
17494 /* SUBS <Rd>,<Rn>,#<imm3>
17496 Not handled here: see PLUS above. */
17497 /* SUBS <Rd>,<Rn>,<Rm> */
17498 if (low_register_operand (op0
, SImode
)
17499 && low_register_operand (op1
, SImode
))
17504 /* MULS <Rdm>,<Rn>,<Rdm>
17505 As an exception to the rule, this is only used
17506 when optimizing for size since MULS is slow on all
17507 known implementations. We do not even want to use
17508 MULS in cold code, if optimizing for speed, so we
17509 test the global flag here. */
17510 if (!optimize_size
)
17512 /* else fall through. */
17516 /* ANDS <Rdn>,<Rm> */
17517 if (rtx_equal_p (dst
, op0
)
17518 && low_register_operand (op1
, SImode
))
17519 action
= action_for_partial_flag_setting
;
17520 else if (rtx_equal_p (dst
, op1
)
17521 && low_register_operand (op0
, SImode
))
17522 action
= action_for_partial_flag_setting
== SKIP
17523 ? SKIP
: SWAP_CONV
;
17529 /* ASRS <Rdn>,<Rm> */
17530 /* LSRS <Rdn>,<Rm> */
17531 /* LSLS <Rdn>,<Rm> */
17532 if (rtx_equal_p (dst
, op0
)
17533 && low_register_operand (op1
, SImode
))
17534 action
= action_for_partial_flag_setting
;
17535 /* ASRS <Rd>,<Rm>,#<imm5> */
17536 /* LSRS <Rd>,<Rm>,#<imm5> */
17537 /* LSLS <Rd>,<Rm>,#<imm5> */
17538 else if (low_register_operand (op0
, SImode
)
17539 && CONST_INT_P (op1
)
17540 && IN_RANGE (INTVAL (op1
), 0, 31))
17541 action
= action_for_partial_flag_setting
;
17545 /* RORS <Rdn>,<Rm> */
17546 if (rtx_equal_p (dst
, op0
)
17547 && low_register_operand (op1
, SImode
))
17548 action
= action_for_partial_flag_setting
;
17552 /* MVNS <Rd>,<Rm> */
17553 if (low_register_operand (op0
, SImode
))
17554 action
= action_for_partial_flag_setting
;
17558 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17559 if (low_register_operand (op0
, SImode
))
17564 /* MOVS <Rd>,#<imm8> */
17565 if (CONST_INT_P (src
)
17566 && IN_RANGE (INTVAL (src
), 0, 255))
17567 action
= action_for_partial_flag_setting
;
17571 /* MOVS and MOV<c> with registers have different
17572 encodings, so are not relevant here. */
17580 if (action
!= SKIP
)
17582 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17583 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17586 if (action
== SWAP_CONV
)
17588 src
= copy_rtx (src
);
17589 XEXP (src
, 0) = op1
;
17590 XEXP (src
, 1) = op0
;
17591 pat
= gen_rtx_SET (dst
, src
);
17592 vec
= gen_rtvec (2, pat
, clobber
);
17594 else /* action == CONV */
17595 vec
= gen_rtvec (2, pat
, clobber
);
17597 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17598 INSN_CODE (insn
) = -1;
17602 if (NONDEBUG_INSN_P (insn
))
17603 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17607 CLEAR_REG_SET (&live
);
17610 /* Gcc puts the pool in the wrong place for ARM, since we can only
17611 load addresses a limited distance around the pc. We do some
17612 special munging to move the constant pool values to the correct
17613 point in the code. */
17618 HOST_WIDE_INT address
= 0;
17623 else if (TARGET_THUMB2
)
17626 /* Ensure all insns that must be split have been split at this point.
17627 Otherwise, the pool placement code below may compute incorrect
17628 insn lengths. Note that when optimizing, all insns have already
17629 been split at this point. */
17631 split_all_insns_noflow ();
17633 minipool_fix_head
= minipool_fix_tail
= NULL
;
17635 /* The first insn must always be a note, or the code below won't
17636 scan it properly. */
17637 insn
= get_insns ();
17638 gcc_assert (NOTE_P (insn
));
17641 /* Scan all the insns and record the operands that will need fixing. */
17642 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17644 if (BARRIER_P (insn
))
17645 push_minipool_barrier (insn
, address
);
17646 else if (INSN_P (insn
))
17648 rtx_jump_table_data
*table
;
17650 note_invalid_constants (insn
, address
, true);
17651 address
+= get_attr_length (insn
);
17653 /* If the insn is a vector jump, add the size of the table
17654 and skip the table. */
17655 if (tablejump_p (insn
, NULL
, &table
))
17657 address
+= get_jump_table_size (table
);
17661 else if (LABEL_P (insn
))
17662 /* Add the worst-case padding due to alignment. We don't add
17663 the _current_ padding because the minipool insertions
17664 themselves might change it. */
17665 address
+= get_label_padding (insn
);
17668 fix
= minipool_fix_head
;
17670 /* Now scan the fixups and perform the required changes. */
17675 Mfix
* last_added_fix
;
17676 Mfix
* last_barrier
= NULL
;
17679 /* Skip any further barriers before the next fix. */
17680 while (fix
&& BARRIER_P (fix
->insn
))
17683 /* No more fixes. */
17687 last_added_fix
= NULL
;
17689 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17691 if (BARRIER_P (ftmp
->insn
))
17693 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17696 last_barrier
= ftmp
;
17698 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17701 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17704 /* If we found a barrier, drop back to that; any fixes that we
17705 could have reached but come after the barrier will now go in
17706 the next mini-pool. */
17707 if (last_barrier
!= NULL
)
17709 /* Reduce the refcount for those fixes that won't go into this
17711 for (fdel
= last_barrier
->next
;
17712 fdel
&& fdel
!= ftmp
;
17715 fdel
->minipool
->refcount
--;
17716 fdel
->minipool
= NULL
;
17719 ftmp
= last_barrier
;
17723 /* ftmp is first fix that we can't fit into this pool and
17724 there no natural barriers that we could use. Insert a
17725 new barrier in the code somewhere between the previous
17726 fix and this one, and arrange to jump around it. */
17727 HOST_WIDE_INT max_address
;
17729 /* The last item on the list of fixes must be a barrier, so
17730 we can never run off the end of the list of fixes without
17731 last_barrier being set. */
17734 max_address
= minipool_vector_head
->max_address
;
17735 /* Check that there isn't another fix that is in range that
17736 we couldn't fit into this pool because the pool was
17737 already too large: we need to put the pool before such an
17738 instruction. The pool itself may come just after the
17739 fix because create_fix_barrier also allows space for a
17740 jump instruction. */
17741 if (ftmp
->address
< max_address
)
17742 max_address
= ftmp
->address
+ 1;
17744 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17747 assign_minipool_offsets (last_barrier
);
17751 if (!BARRIER_P (ftmp
->insn
)
17752 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17759 /* Scan over the fixes we have identified for this pool, fixing them
17760 up and adding the constants to the pool itself. */
17761 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17762 this_fix
= this_fix
->next
)
17763 if (!BARRIER_P (this_fix
->insn
))
17766 = plus_constant (Pmode
,
17767 gen_rtx_LABEL_REF (VOIDmode
,
17768 minipool_vector_label
),
17769 this_fix
->minipool
->offset
);
17770 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17773 dump_minipool (last_barrier
->insn
);
17777 /* From now on we must synthesize any constants that we can't handle
17778 directly. This can happen if the RTL gets split during final
17779 instruction generation. */
17780 cfun
->machine
->after_arm_reorg
= 1;
17782 /* Free the minipool memory. */
17783 obstack_free (&minipool_obstack
, minipool_startobj
);
17786 /* Routines to output assembly language. */
17788 /* Return string representation of passed in real value. */
17789 static const char *
17790 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17792 if (!fp_consts_inited
)
17795 gcc_assert (real_equal (r
, &value_fp0
));
17799 /* OPERANDS[0] is the entire list of insns that constitute pop,
17800 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17801 is in the list, UPDATE is true iff the list contains explicit
17802 update of base register. */
17804 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17810 const char *conditional
;
17811 int num_saves
= XVECLEN (operands
[0], 0);
17812 unsigned int regno
;
17813 unsigned int regno_base
= REGNO (operands
[1]);
17814 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17817 offset
+= update
? 1 : 0;
17818 offset
+= return_pc
? 1 : 0;
17820 /* Is the base register in the list? */
17821 for (i
= offset
; i
< num_saves
; i
++)
17823 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17824 /* If SP is in the list, then the base register must be SP. */
17825 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17826 /* If base register is in the list, there must be no explicit update. */
17827 if (regno
== regno_base
)
17828 gcc_assert (!update
);
17831 conditional
= reverse
? "%?%D0" : "%?%d0";
17832 /* Can't use POP if returning from an interrupt. */
17833 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17834 sprintf (pattern
, "pop%s\t{", conditional
);
17837 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17838 It's just a convention, their semantics are identical. */
17839 if (regno_base
== SP_REGNUM
)
17840 sprintf (pattern
, "ldmfd%s\t", conditional
);
17842 sprintf (pattern
, "ldmia%s\t", conditional
);
17844 sprintf (pattern
, "ldm%s\t", conditional
);
17846 strcat (pattern
, reg_names
[regno_base
]);
17848 strcat (pattern
, "!, {");
17850 strcat (pattern
, ", {");
17853 /* Output the first destination register. */
17855 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17857 /* Output the rest of the destination registers. */
17858 for (i
= offset
+ 1; i
< num_saves
; i
++)
17860 strcat (pattern
, ", ");
17862 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17865 strcat (pattern
, "}");
17867 if (interrupt_p
&& return_pc
)
17868 strcat (pattern
, "^");
17870 output_asm_insn (pattern
, &cond
);
17874 /* Output the assembly for a store multiple. */
17877 vfp_output_vstmd (rtx
* operands
)
17883 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17884 ? XEXP (operands
[0], 0)
17885 : XEXP (XEXP (operands
[0], 0), 0);
17886 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17889 strcpy (pattern
, "vpush%?.64\t{%P1");
17891 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17893 p
= strlen (pattern
);
17895 gcc_assert (REG_P (operands
[1]));
17897 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17898 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17900 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17902 strcpy (&pattern
[p
], "}");
17904 output_asm_insn (pattern
, operands
);
17909 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17910 number of bytes pushed. */
17913 vfp_emit_fstmd (int base_reg
, int count
)
17920 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17921 register pairs are stored by a store multiple insn. We avoid this
17922 by pushing an extra pair. */
17923 if (count
== 2 && !arm_arch6
)
17925 if (base_reg
== LAST_VFP_REGNUM
- 3)
17930 /* FSTMD may not store more than 16 doubleword registers at once. Split
17931 larger stores into multiple parts (up to a maximum of two, in
17936 /* NOTE: base_reg is an internal register number, so each D register
17938 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17939 saved
+= vfp_emit_fstmd (base_reg
, 16);
17943 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17944 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17946 reg
= gen_rtx_REG (DFmode
, base_reg
);
17949 XVECEXP (par
, 0, 0)
17950 = gen_rtx_SET (gen_frame_mem
17952 gen_rtx_PRE_MODIFY (Pmode
,
17955 (Pmode
, stack_pointer_rtx
,
17958 gen_rtx_UNSPEC (BLKmode
,
17959 gen_rtvec (1, reg
),
17960 UNSPEC_PUSH_MULT
));
17962 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17963 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17964 RTX_FRAME_RELATED_P (tmp
) = 1;
17965 XVECEXP (dwarf
, 0, 0) = tmp
;
17967 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17968 RTX_FRAME_RELATED_P (tmp
) = 1;
17969 XVECEXP (dwarf
, 0, 1) = tmp
;
17971 for (i
= 1; i
< count
; i
++)
17973 reg
= gen_rtx_REG (DFmode
, base_reg
);
17975 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17977 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17978 plus_constant (Pmode
,
17982 RTX_FRAME_RELATED_P (tmp
) = 1;
17983 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17986 par
= emit_insn (par
);
17987 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17988 RTX_FRAME_RELATED_P (par
) = 1;
17993 /* Emit a call instruction with pattern PAT. ADDR is the address of
17994 the call target. */
17997 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
18001 insn
= emit_call_insn (pat
);
18003 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18004 If the call might use such an entry, add a use of the PIC register
18005 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18006 if (TARGET_VXWORKS_RTP
18009 && GET_CODE (addr
) == SYMBOL_REF
18010 && (SYMBOL_REF_DECL (addr
)
18011 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
18012 : !SYMBOL_REF_LOCAL_P (addr
)))
18014 require_pic_register ();
18015 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
18018 if (TARGET_AAPCS_BASED
)
18020 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18021 linker. We need to add an IP clobber to allow setting
18022 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18023 is not needed since it's a fixed register. */
18024 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
18025 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
18029 /* Output a 'call' insn. */
18031 output_call (rtx
*operands
)
18033 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
18035 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18036 if (REGNO (operands
[0]) == LR_REGNUM
)
18038 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
18039 output_asm_insn ("mov%?\t%0, %|lr", operands
);
18042 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
18044 if (TARGET_INTERWORK
|| arm_arch4t
)
18045 output_asm_insn ("bx%?\t%0", operands
);
18047 output_asm_insn ("mov%?\t%|pc, %0", operands
);
18052 /* Output a move from arm registers to arm registers of a long double
18053 OPERANDS[0] is the destination.
18054 OPERANDS[1] is the source. */
18056 output_mov_long_double_arm_from_arm (rtx
*operands
)
18058 /* We have to be careful here because the two might overlap. */
18059 int dest_start
= REGNO (operands
[0]);
18060 int src_start
= REGNO (operands
[1]);
18064 if (dest_start
< src_start
)
18066 for (i
= 0; i
< 3; i
++)
18068 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18069 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18070 output_asm_insn ("mov%?\t%0, %1", ops
);
18075 for (i
= 2; i
>= 0; i
--)
18077 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
18078 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
18079 output_asm_insn ("mov%?\t%0, %1", ops
);
18087 arm_emit_movpair (rtx dest
, rtx src
)
18091 /* If the src is an immediate, simplify it. */
18092 if (CONST_INT_P (src
))
18094 HOST_WIDE_INT val
= INTVAL (src
);
18095 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
18096 if ((val
>> 16) & 0x0000ffff)
18098 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
18100 GEN_INT ((val
>> 16) & 0x0000ffff));
18101 insn
= get_last_insn ();
18102 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18106 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
18107 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
18108 insn
= get_last_insn ();
18109 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
18112 /* Output a move between double words. It must be REG<-MEM
18115 output_move_double (rtx
*operands
, bool emit
, int *count
)
18117 enum rtx_code code0
= GET_CODE (operands
[0]);
18118 enum rtx_code code1
= GET_CODE (operands
[1]);
18123 /* The only case when this might happen is when
18124 you are looking at the length of a DImode instruction
18125 that has an invalid constant in it. */
18126 if (code0
== REG
&& code1
!= MEM
)
18128 gcc_assert (!emit
);
18135 unsigned int reg0
= REGNO (operands
[0]);
18137 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
18139 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
18141 switch (GET_CODE (XEXP (operands
[1], 0)))
18148 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
18149 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
18151 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18156 gcc_assert (TARGET_LDRD
);
18158 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
18165 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
18167 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18175 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18177 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18182 gcc_assert (TARGET_LDRD
);
18184 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18189 /* Autoicrement addressing modes should never have overlapping
18190 base and destination registers, and overlapping index registers
18191 are already prohibited, so this doesn't need to worry about
18193 otherops
[0] = operands
[0];
18194 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18195 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18197 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18199 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18201 /* Registers overlap so split out the increment. */
18204 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18205 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18212 /* Use a single insn if we can.
18213 FIXME: IWMMXT allows offsets larger than ldrd can
18214 handle, fix these up with a pair of ldr. */
18216 || !CONST_INT_P (otherops
[2])
18217 || (INTVAL (otherops
[2]) > -256
18218 && INTVAL (otherops
[2]) < 256))
18221 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18227 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18228 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18238 /* Use a single insn if we can.
18239 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18240 fix these up with a pair of ldr. */
18242 || !CONST_INT_P (otherops
[2])
18243 || (INTVAL (otherops
[2]) > -256
18244 && INTVAL (otherops
[2]) < 256))
18247 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18253 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18254 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18264 /* We might be able to use ldrd %0, %1 here. However the range is
18265 different to ldr/adr, and it is broken on some ARMv7-M
18266 implementations. */
18267 /* Use the second register of the pair to avoid problematic
18269 otherops
[1] = operands
[1];
18271 output_asm_insn ("adr%?\t%0, %1", otherops
);
18272 operands
[1] = otherops
[0];
18276 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18278 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18285 /* ??? This needs checking for thumb2. */
18287 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18288 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18290 otherops
[0] = operands
[0];
18291 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18292 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18294 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18296 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18298 switch ((int) INTVAL (otherops
[2]))
18302 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18308 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18314 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18318 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18319 operands
[1] = otherops
[0];
18321 && (REG_P (otherops
[2])
18323 || (CONST_INT_P (otherops
[2])
18324 && INTVAL (otherops
[2]) > -256
18325 && INTVAL (otherops
[2]) < 256)))
18327 if (reg_overlap_mentioned_p (operands
[0],
18330 /* Swap base and index registers over to
18331 avoid a conflict. */
18332 std::swap (otherops
[1], otherops
[2]);
18334 /* If both registers conflict, it will usually
18335 have been fixed by a splitter. */
18336 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18337 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18341 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18342 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18349 otherops
[0] = operands
[0];
18351 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18356 if (CONST_INT_P (otherops
[2]))
18360 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18361 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18363 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18369 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18375 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18382 return "ldrd%?\t%0, [%1]";
18384 return "ldmia%?\t%1, %M0";
18388 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18389 /* Take care of overlapping base/data reg. */
18390 if (reg_mentioned_p (operands
[0], operands
[1]))
18394 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18395 output_asm_insn ("ldr%?\t%0, %1", operands
);
18405 output_asm_insn ("ldr%?\t%0, %1", operands
);
18406 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18416 /* Constraints should ensure this. */
18417 gcc_assert (code0
== MEM
&& code1
== REG
);
18418 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18419 || (TARGET_ARM
&& TARGET_LDRD
));
18421 switch (GET_CODE (XEXP (operands
[0], 0)))
18427 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18429 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18434 gcc_assert (TARGET_LDRD
);
18436 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18443 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18445 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18453 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18455 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18460 gcc_assert (TARGET_LDRD
);
18462 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18467 otherops
[0] = operands
[1];
18468 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18469 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18471 /* IWMMXT allows offsets larger than ldrd can handle,
18472 fix these up with a pair of ldr. */
18474 && CONST_INT_P (otherops
[2])
18475 && (INTVAL(otherops
[2]) <= -256
18476 || INTVAL(otherops
[2]) >= 256))
18478 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18482 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18483 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18492 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18493 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18499 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18502 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18507 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18512 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18513 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18515 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18519 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18526 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18533 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18538 && (REG_P (otherops
[2])
18540 || (CONST_INT_P (otherops
[2])
18541 && INTVAL (otherops
[2]) > -256
18542 && INTVAL (otherops
[2]) < 256)))
18544 otherops
[0] = operands
[1];
18545 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18547 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18553 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18554 otherops
[1] = operands
[1];
18557 output_asm_insn ("str%?\t%1, %0", operands
);
18558 output_asm_insn ("str%?\t%H1, %0", otherops
);
18568 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18569 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18572 output_move_quad (rtx
*operands
)
18574 if (REG_P (operands
[0]))
18576 /* Load, or reg->reg move. */
18578 if (MEM_P (operands
[1]))
18580 switch (GET_CODE (XEXP (operands
[1], 0)))
18583 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18588 output_asm_insn ("adr%?\t%0, %1", operands
);
18589 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18593 gcc_unreachable ();
18601 gcc_assert (REG_P (operands
[1]));
18603 dest
= REGNO (operands
[0]);
18604 src
= REGNO (operands
[1]);
18606 /* This seems pretty dumb, but hopefully GCC won't try to do it
18609 for (i
= 0; i
< 4; i
++)
18611 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18612 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18613 output_asm_insn ("mov%?\t%0, %1", ops
);
18616 for (i
= 3; i
>= 0; i
--)
18618 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18619 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18620 output_asm_insn ("mov%?\t%0, %1", ops
);
18626 gcc_assert (MEM_P (operands
[0]));
18627 gcc_assert (REG_P (operands
[1]));
18628 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18630 switch (GET_CODE (XEXP (operands
[0], 0)))
18633 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18637 gcc_unreachable ();
18644 /* Output a VFP load or store instruction. */
18647 output_move_vfp (rtx
*operands
)
18649 rtx reg
, mem
, addr
, ops
[2];
18650 int load
= REG_P (operands
[0]);
18651 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18652 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18657 reg
= operands
[!load
];
18658 mem
= operands
[load
];
18660 mode
= GET_MODE (reg
);
18662 gcc_assert (REG_P (reg
));
18663 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18664 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
18669 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18670 gcc_assert (MEM_P (mem
));
18672 addr
= XEXP (mem
, 0);
18674 switch (GET_CODE (addr
))
18677 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18678 ops
[0] = XEXP (addr
, 0);
18683 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18684 ops
[0] = XEXP (addr
, 0);
18689 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18695 sprintf (buff
, templ
,
18696 load
? "ld" : "st",
18699 integer_p
? "\t%@ int" : "");
18700 output_asm_insn (buff
, ops
);
18705 /* Output a Neon double-word or quad-word load or store, or a load
18706 or store for larger structure modes.
18708 WARNING: The ordering of elements is weird in big-endian mode,
18709 because the EABI requires that vectors stored in memory appear
18710 as though they were stored by a VSTM, as required by the EABI.
18711 GCC RTL defines element ordering based on in-memory order.
18712 This can be different from the architectural ordering of elements
18713 within a NEON register. The intrinsics defined in arm_neon.h use the
18714 NEON register element ordering, not the GCC RTL element ordering.
18716 For example, the in-memory ordering of a big-endian a quadword
18717 vector with 16-bit elements when stored from register pair {d0,d1}
18718 will be (lowest address first, d0[N] is NEON register element N):
18720 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18722 When necessary, quadword registers (dN, dN+1) are moved to ARM
18723 registers from rN in the order:
18725 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18727 So that STM/LDM can be used on vectors in ARM registers, and the
18728 same memory layout will result as if VSTM/VLDM were used.
18730 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18731 possible, which allows use of appropriate alignment tags.
18732 Note that the choice of "64" is independent of the actual vector
18733 element size; this size simply ensures that the behavior is
18734 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18736 Due to limitations of those instructions, use of VST1.64/VLD1.64
18737 is not possible if:
18738 - the address contains PRE_DEC, or
18739 - the mode refers to more than 4 double-word registers
18741 In those cases, it would be possible to replace VSTM/VLDM by a
18742 sequence of instructions; this is not currently implemented since
18743 this is not certain to actually improve performance. */
18746 output_move_neon (rtx
*operands
)
18748 rtx reg
, mem
, addr
, ops
[2];
18749 int regno
, nregs
, load
= REG_P (operands
[0]);
18754 reg
= operands
[!load
];
18755 mem
= operands
[load
];
18757 mode
= GET_MODE (reg
);
18759 gcc_assert (REG_P (reg
));
18760 regno
= REGNO (reg
);
18761 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18762 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18763 || NEON_REGNO_OK_FOR_QUAD (regno
));
18764 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18765 || VALID_NEON_QREG_MODE (mode
)
18766 || VALID_NEON_STRUCT_MODE (mode
));
18767 gcc_assert (MEM_P (mem
));
18769 addr
= XEXP (mem
, 0);
18771 /* Strip off const from addresses like (const (plus (...))). */
18772 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18773 addr
= XEXP (addr
, 0);
18775 switch (GET_CODE (addr
))
18778 /* We have to use vldm / vstm for too-large modes. */
18781 templ
= "v%smia%%?\t%%0!, %%h1";
18782 ops
[0] = XEXP (addr
, 0);
18786 templ
= "v%s1.64\t%%h1, %%A0";
18793 /* We have to use vldm / vstm in this case, since there is no
18794 pre-decrement form of the vld1 / vst1 instructions. */
18795 templ
= "v%smdb%%?\t%%0!, %%h1";
18796 ops
[0] = XEXP (addr
, 0);
18801 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18802 gcc_unreachable ();
18805 /* We have to use vldm / vstm for too-large modes. */
18809 templ
= "v%smia%%?\t%%m0, %%h1";
18811 templ
= "v%s1.64\t%%h1, %%A0";
18817 /* Fall through. */
18823 for (i
= 0; i
< nregs
; i
++)
18825 /* We're only using DImode here because it's a convenient size. */
18826 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18827 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18828 if (reg_overlap_mentioned_p (ops
[0], mem
))
18830 gcc_assert (overlap
== -1);
18835 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18836 output_asm_insn (buff
, ops
);
18841 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18842 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18843 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18844 output_asm_insn (buff
, ops
);
18851 gcc_unreachable ();
18854 sprintf (buff
, templ
, load
? "ld" : "st");
18855 output_asm_insn (buff
, ops
);
18860 /* Compute and return the length of neon_mov<mode>, where <mode> is
18861 one of VSTRUCT modes: EI, OI, CI or XI. */
18863 arm_attr_length_move_neon (rtx_insn
*insn
)
18865 rtx reg
, mem
, addr
;
18869 extract_insn_cached (insn
);
18871 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18873 mode
= GET_MODE (recog_data
.operand
[0]);
18884 gcc_unreachable ();
18888 load
= REG_P (recog_data
.operand
[0]);
18889 reg
= recog_data
.operand
[!load
];
18890 mem
= recog_data
.operand
[load
];
18892 gcc_assert (MEM_P (mem
));
18894 mode
= GET_MODE (reg
);
18895 addr
= XEXP (mem
, 0);
18897 /* Strip off const from addresses like (const (plus (...))). */
18898 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18899 addr
= XEXP (addr
, 0);
18901 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18903 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18910 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18914 arm_address_offset_is_imm (rtx_insn
*insn
)
18918 extract_insn_cached (insn
);
18920 if (REG_P (recog_data
.operand
[0]))
18923 mem
= recog_data
.operand
[0];
18925 gcc_assert (MEM_P (mem
));
18927 addr
= XEXP (mem
, 0);
18930 || (GET_CODE (addr
) == PLUS
18931 && REG_P (XEXP (addr
, 0))
18932 && CONST_INT_P (XEXP (addr
, 1))))
18938 /* Output an ADD r, s, #n where n may be too big for one instruction.
18939 If adding zero to one register, output nothing. */
18941 output_add_immediate (rtx
*operands
)
18943 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18945 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18948 output_multi_immediate (operands
,
18949 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18952 output_multi_immediate (operands
,
18953 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18960 /* Output a multiple immediate operation.
18961 OPERANDS is the vector of operands referred to in the output patterns.
18962 INSTR1 is the output pattern to use for the first constant.
18963 INSTR2 is the output pattern to use for subsequent constants.
18964 IMMED_OP is the index of the constant slot in OPERANDS.
18965 N is the constant value. */
18966 static const char *
18967 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18968 int immed_op
, HOST_WIDE_INT n
)
18970 #if HOST_BITS_PER_WIDE_INT > 32
18976 /* Quick and easy output. */
18977 operands
[immed_op
] = const0_rtx
;
18978 output_asm_insn (instr1
, operands
);
18983 const char * instr
= instr1
;
18985 /* Note that n is never zero here (which would give no output). */
18986 for (i
= 0; i
< 32; i
+= 2)
18990 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18991 output_asm_insn (instr
, operands
);
19001 /* Return the name of a shifter operation. */
19002 static const char *
19003 arm_shift_nmem(enum rtx_code code
)
19008 return ARM_LSL_NAME
;
19024 /* Return the appropriate ARM instruction for the operation code.
19025 The returned result should not be overwritten. OP is the rtx of the
19026 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19029 arithmetic_instr (rtx op
, int shift_first_arg
)
19031 switch (GET_CODE (op
))
19037 return shift_first_arg
? "rsb" : "sub";
19052 return arm_shift_nmem(GET_CODE(op
));
19055 gcc_unreachable ();
19059 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19060 for the operation code. The returned result should not be overwritten.
19061 OP is the rtx code of the shift.
19062 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19064 static const char *
19065 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
19068 enum rtx_code code
= GET_CODE (op
);
19073 if (!CONST_INT_P (XEXP (op
, 1)))
19075 output_operand_lossage ("invalid shift operand");
19080 *amountp
= 32 - INTVAL (XEXP (op
, 1));
19088 mnem
= arm_shift_nmem(code
);
19089 if (CONST_INT_P (XEXP (op
, 1)))
19091 *amountp
= INTVAL (XEXP (op
, 1));
19093 else if (REG_P (XEXP (op
, 1)))
19100 output_operand_lossage ("invalid shift operand");
19106 /* We never have to worry about the amount being other than a
19107 power of 2, since this case can never be reloaded from a reg. */
19108 if (!CONST_INT_P (XEXP (op
, 1)))
19110 output_operand_lossage ("invalid shift operand");
19114 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
19116 /* Amount must be a power of two. */
19117 if (*amountp
& (*amountp
- 1))
19119 output_operand_lossage ("invalid shift operand");
19123 *amountp
= exact_log2 (*amountp
);
19124 gcc_assert (IN_RANGE (*amountp
, 0, 31));
19125 return ARM_LSL_NAME
;
19128 output_operand_lossage ("invalid shift operand");
19132 /* This is not 100% correct, but follows from the desire to merge
19133 multiplication by a power of 2 with the recognizer for a
19134 shift. >=32 is not a valid shift for "lsl", so we must try and
19135 output a shift that produces the correct arithmetical result.
19136 Using lsr #32 is identical except for the fact that the carry bit
19137 is not set correctly if we set the flags; but we never use the
19138 carry bit from such an operation, so we can ignore that. */
19139 if (code
== ROTATERT
)
19140 /* Rotate is just modulo 32. */
19142 else if (*amountp
!= (*amountp
& 31))
19144 if (code
== ASHIFT
)
19149 /* Shifts of 0 are no-ops. */
19156 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19157 because /bin/as is horribly restrictive. The judgement about
19158 whether or not each character is 'printable' (and can be output as
19159 is) or not (and must be printed with an octal escape) must be made
19160 with reference to the *host* character set -- the situation is
19161 similar to that discussed in the comments above pp_c_char in
19162 c-pretty-print.c. */
19164 #define MAX_ASCII_LEN 51
19167 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19170 int len_so_far
= 0;
19172 fputs ("\t.ascii\t\"", stream
);
19174 for (i
= 0; i
< len
; i
++)
19178 if (len_so_far
>= MAX_ASCII_LEN
)
19180 fputs ("\"\n\t.ascii\t\"", stream
);
19186 if (c
== '\\' || c
== '\"')
19188 putc ('\\', stream
);
19196 fprintf (stream
, "\\%03o", c
);
19201 fputs ("\"\n", stream
);
19204 /* Whether a register is callee saved or not. This is necessary because high
19205 registers are marked as caller saved when optimizing for size on Thumb-1
19206 targets despite being callee saved in order to avoid using them. */
19207 #define callee_saved_reg_p(reg) \
19208 (!call_used_regs[reg] \
19209 || (TARGET_THUMB1 && optimize_size \
19210 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19212 /* Compute the register save mask for registers 0 through 12
19213 inclusive. This code is used by arm_compute_save_reg_mask. */
19215 static unsigned long
19216 arm_compute_save_reg0_reg12_mask (void)
19218 unsigned long func_type
= arm_current_func_type ();
19219 unsigned long save_reg_mask
= 0;
19222 if (IS_INTERRUPT (func_type
))
19224 unsigned int max_reg
;
19225 /* Interrupt functions must not corrupt any registers,
19226 even call clobbered ones. If this is a leaf function
19227 we can just examine the registers used by the RTL, but
19228 otherwise we have to assume that whatever function is
19229 called might clobber anything, and so we have to save
19230 all the call-clobbered registers as well. */
19231 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19232 /* FIQ handlers have registers r8 - r12 banked, so
19233 we only need to check r0 - r7, Normal ISRs only
19234 bank r14 and r15, so we must check up to r12.
19235 r13 is the stack pointer which is always preserved,
19236 so we do not need to consider it here. */
19241 for (reg
= 0; reg
<= max_reg
; reg
++)
19242 if (df_regs_ever_live_p (reg
)
19243 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19244 save_reg_mask
|= (1 << reg
);
19246 /* Also save the pic base register if necessary. */
19248 && !TARGET_SINGLE_PIC_BASE
19249 && arm_pic_register
!= INVALID_REGNUM
19250 && crtl
->uses_pic_offset_table
)
19251 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19253 else if (IS_VOLATILE(func_type
))
19255 /* For noreturn functions we historically omitted register saves
19256 altogether. However this really messes up debugging. As a
19257 compromise save just the frame pointers. Combined with the link
19258 register saved elsewhere this should be sufficient to get
19260 if (frame_pointer_needed
)
19261 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19262 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19263 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19264 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19265 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19269 /* In the normal case we only need to save those registers
19270 which are call saved and which are used by this function. */
19271 for (reg
= 0; reg
<= 11; reg
++)
19272 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19273 save_reg_mask
|= (1 << reg
);
19275 /* Handle the frame pointer as a special case. */
19276 if (frame_pointer_needed
)
19277 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19279 /* If we aren't loading the PIC register,
19280 don't stack it even though it may be live. */
19282 && !TARGET_SINGLE_PIC_BASE
19283 && arm_pic_register
!= INVALID_REGNUM
19284 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19285 || crtl
->uses_pic_offset_table
))
19286 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19288 /* The prologue will copy SP into R0, so save it. */
19289 if (IS_STACKALIGN (func_type
))
19290 save_reg_mask
|= 1;
19293 /* Save registers so the exception handler can modify them. */
19294 if (crtl
->calls_eh_return
)
19300 reg
= EH_RETURN_DATA_REGNO (i
);
19301 if (reg
== INVALID_REGNUM
)
19303 save_reg_mask
|= 1 << reg
;
19307 return save_reg_mask
;
19310 /* Return true if r3 is live at the start of the function. */
19313 arm_r3_live_at_start_p (void)
19315 /* Just look at cfg info, which is still close enough to correct at this
19316 point. This gives false positives for broken functions that might use
19317 uninitialized data that happens to be allocated in r3, but who cares? */
19318 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19321 /* Compute the number of bytes used to store the static chain register on the
19322 stack, above the stack frame. We need to know this accurately to get the
19323 alignment of the rest of the stack frame correct. */
19326 arm_compute_static_chain_stack_bytes (void)
19328 /* See the defining assertion in arm_expand_prologue. */
19329 if (IS_NESTED (arm_current_func_type ())
19330 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19331 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19332 && !df_regs_ever_live_p (LR_REGNUM
)))
19333 && arm_r3_live_at_start_p ()
19334 && crtl
->args
.pretend_args_size
== 0)
19340 /* Compute a bit mask of which registers need to be
19341 saved on the stack for the current function.
19342 This is used by arm_get_frame_offsets, which may add extra registers. */
19344 static unsigned long
19345 arm_compute_save_reg_mask (void)
19347 unsigned int save_reg_mask
= 0;
19348 unsigned long func_type
= arm_current_func_type ();
19351 if (IS_NAKED (func_type
))
19352 /* This should never really happen. */
19355 /* If we are creating a stack frame, then we must save the frame pointer,
19356 IP (which will hold the old stack pointer), LR and the PC. */
19357 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19359 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19362 | (1 << PC_REGNUM
);
19364 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19366 /* Decide if we need to save the link register.
19367 Interrupt routines have their own banked link register,
19368 so they never need to save it.
19369 Otherwise if we do not use the link register we do not need to save
19370 it. If we are pushing other registers onto the stack however, we
19371 can save an instruction in the epilogue by pushing the link register
19372 now and then popping it back into the PC. This incurs extra memory
19373 accesses though, so we only do it when optimizing for size, and only
19374 if we know that we will not need a fancy return sequence. */
19375 if (df_regs_ever_live_p (LR_REGNUM
)
19378 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19379 && !crtl
->tail_call_emit
19380 && !crtl
->calls_eh_return
))
19381 save_reg_mask
|= 1 << LR_REGNUM
;
19383 if (cfun
->machine
->lr_save_eliminated
)
19384 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19386 if (TARGET_REALLY_IWMMXT
19387 && ((bit_count (save_reg_mask
)
19388 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19389 arm_compute_static_chain_stack_bytes())
19392 /* The total number of registers that are going to be pushed
19393 onto the stack is odd. We need to ensure that the stack
19394 is 64-bit aligned before we start to save iWMMXt registers,
19395 and also before we start to create locals. (A local variable
19396 might be a double or long long which we will load/store using
19397 an iWMMXt instruction). Therefore we need to push another
19398 ARM register, so that the stack will be 64-bit aligned. We
19399 try to avoid using the arg registers (r0 -r3) as they might be
19400 used to pass values in a tail call. */
19401 for (reg
= 4; reg
<= 12; reg
++)
19402 if ((save_reg_mask
& (1 << reg
)) == 0)
19406 save_reg_mask
|= (1 << reg
);
19409 cfun
->machine
->sibcall_blocked
= 1;
19410 save_reg_mask
|= (1 << 3);
19414 /* We may need to push an additional register for use initializing the
19415 PIC base register. */
19416 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19417 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19419 reg
= thumb_find_work_register (1 << 4);
19420 if (!call_used_regs
[reg
])
19421 save_reg_mask
|= (1 << reg
);
19424 return save_reg_mask
;
19427 /* Compute a bit mask of which registers need to be
19428 saved on the stack for the current function. */
19429 static unsigned long
19430 thumb1_compute_save_reg_mask (void)
19432 unsigned long mask
;
19436 for (reg
= 0; reg
< 12; reg
++)
19437 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19441 && !TARGET_SINGLE_PIC_BASE
19442 && arm_pic_register
!= INVALID_REGNUM
19443 && crtl
->uses_pic_offset_table
)
19444 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19446 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19447 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19448 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19450 /* LR will also be pushed if any lo regs are pushed. */
19451 if (mask
& 0xff || thumb_force_lr_save ())
19452 mask
|= (1 << LR_REGNUM
);
19454 /* Make sure we have a low work register if we need one.
19455 We will need one if we are going to push a high register,
19456 but we are not currently intending to push a low register. */
19457 if ((mask
& 0xff) == 0
19458 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19460 /* Use thumb_find_work_register to choose which register
19461 we will use. If the register is live then we will
19462 have to push it. Use LAST_LO_REGNUM as our fallback
19463 choice for the register to select. */
19464 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19465 /* Make sure the register returned by thumb_find_work_register is
19466 not part of the return value. */
19467 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19468 reg
= LAST_LO_REGNUM
;
19470 if (callee_saved_reg_p (reg
))
19474 /* The 504 below is 8 bytes less than 512 because there are two possible
19475 alignment words. We can't tell here if they will be present or not so we
19476 have to play it safe and assume that they are. */
19477 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19478 ROUND_UP_WORD (get_frame_size ()) +
19479 crtl
->outgoing_args_size
) >= 504)
19481 /* This is the same as the code in thumb1_expand_prologue() which
19482 determines which register to use for stack decrement. */
19483 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19484 if (mask
& (1 << reg
))
19487 if (reg
> LAST_LO_REGNUM
)
19489 /* Make sure we have a register available for stack decrement. */
19490 mask
|= 1 << LAST_LO_REGNUM
;
19498 /* Return the number of bytes required to save VFP registers. */
19500 arm_get_vfp_saved_size (void)
19502 unsigned int regno
;
19507 /* Space for saved VFP registers. */
19508 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19511 for (regno
= FIRST_VFP_REGNUM
;
19512 regno
< LAST_VFP_REGNUM
;
19515 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19516 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19520 /* Workaround ARM10 VFPr1 bug. */
19521 if (count
== 2 && !arm_arch6
)
19523 saved
+= count
* 8;
19532 if (count
== 2 && !arm_arch6
)
19534 saved
+= count
* 8;
19541 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19542 everything bar the final return instruction. If simple_return is true,
19543 then do not output epilogue, because it has already been emitted in RTL. */
19545 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19546 bool simple_return
)
19548 char conditional
[10];
19551 unsigned long live_regs_mask
;
19552 unsigned long func_type
;
19553 arm_stack_offsets
*offsets
;
19555 func_type
= arm_current_func_type ();
19557 if (IS_NAKED (func_type
))
19560 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19562 /* If this function was declared non-returning, and we have
19563 found a tail call, then we have to trust that the called
19564 function won't return. */
19569 /* Otherwise, trap an attempted return by aborting. */
19571 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19573 assemble_external_libcall (ops
[1]);
19574 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19580 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19582 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19584 cfun
->machine
->return_used_this_function
= 1;
19586 offsets
= arm_get_frame_offsets ();
19587 live_regs_mask
= offsets
->saved_regs_mask
;
19589 if (!simple_return
&& live_regs_mask
)
19591 const char * return_reg
;
19593 /* If we do not have any special requirements for function exit
19594 (e.g. interworking) then we can load the return address
19595 directly into the PC. Otherwise we must load it into LR. */
19597 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19598 return_reg
= reg_names
[PC_REGNUM
];
19600 return_reg
= reg_names
[LR_REGNUM
];
19602 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19604 /* There are three possible reasons for the IP register
19605 being saved. 1) a stack frame was created, in which case
19606 IP contains the old stack pointer, or 2) an ISR routine
19607 corrupted it, or 3) it was saved to align the stack on
19608 iWMMXt. In case 1, restore IP into SP, otherwise just
19610 if (frame_pointer_needed
)
19612 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19613 live_regs_mask
|= (1 << SP_REGNUM
);
19616 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19619 /* On some ARM architectures it is faster to use LDR rather than
19620 LDM to load a single register. On other architectures, the
19621 cost is the same. In 26 bit mode, or for exception handlers,
19622 we have to use LDM to load the PC so that the CPSR is also
19624 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19625 if (live_regs_mask
== (1U << reg
))
19628 if (reg
<= LAST_ARM_REGNUM
19629 && (reg
!= LR_REGNUM
19631 || ! IS_INTERRUPT (func_type
)))
19633 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19634 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19641 /* Generate the load multiple instruction to restore the
19642 registers. Note we can get here, even if
19643 frame_pointer_needed is true, but only if sp already
19644 points to the base of the saved core registers. */
19645 if (live_regs_mask
& (1 << SP_REGNUM
))
19647 unsigned HOST_WIDE_INT stack_adjust
;
19649 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19650 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19652 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19653 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19656 /* If we can't use ldmib (SA110 bug),
19657 then try to pop r3 instead. */
19659 live_regs_mask
|= 1 << 3;
19661 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19664 /* For interrupt returns we have to use an LDM rather than
19665 a POP so that we can use the exception return variant. */
19666 else if (IS_INTERRUPT (func_type
))
19667 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19669 sprintf (instr
, "pop%s\t{", conditional
);
19671 p
= instr
+ strlen (instr
);
19673 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19674 if (live_regs_mask
& (1 << reg
))
19676 int l
= strlen (reg_names
[reg
]);
19682 memcpy (p
, ", ", 2);
19686 memcpy (p
, "%|", 2);
19687 memcpy (p
+ 2, reg_names
[reg
], l
);
19691 if (live_regs_mask
& (1 << LR_REGNUM
))
19693 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19694 /* If returning from an interrupt, restore the CPSR. */
19695 if (IS_INTERRUPT (func_type
))
19702 output_asm_insn (instr
, & operand
);
19704 /* See if we need to generate an extra instruction to
19705 perform the actual function return. */
19707 && func_type
!= ARM_FT_INTERWORKED
19708 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19710 /* The return has already been handled
19711 by loading the LR into the PC. */
19718 switch ((int) ARM_FUNC_TYPE (func_type
))
19722 /* ??? This is wrong for unified assembly syntax. */
19723 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19726 case ARM_FT_INTERWORKED
:
19727 gcc_assert (arm_arch5
|| arm_arch4t
);
19728 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19731 case ARM_FT_EXCEPTION
:
19732 /* ??? This is wrong for unified assembly syntax. */
19733 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19737 /* Use bx if it's available. */
19738 if (arm_arch5
|| arm_arch4t
)
19739 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19741 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19745 output_asm_insn (instr
, & operand
);
19751 /* Write the function name into the code section, directly preceding
19752 the function prologue.
19754 Code will be output similar to this:
19756 .ascii "arm_poke_function_name", 0
19759 .word 0xff000000 + (t1 - t0)
19760 arm_poke_function_name
19762 stmfd sp!, {fp, ip, lr, pc}
19765 When performing a stack backtrace, code can inspect the value
19766 of 'pc' stored at 'fp' + 0. If the trace function then looks
19767 at location pc - 12 and the top 8 bits are set, then we know
19768 that there is a function name embedded immediately preceding this
19769 location and has length ((pc[-3]) & 0xff000000).
19771 We assume that pc is declared as a pointer to an unsigned long.
19773 It is of no benefit to output the function name if we are assembling
19774 a leaf function. These function types will not contain a stack
19775 backtrace structure, therefore it is not possible to determine the
19778 arm_poke_function_name (FILE *stream
, const char *name
)
19780 unsigned long alignlength
;
19781 unsigned long length
;
19784 length
= strlen (name
) + 1;
19785 alignlength
= ROUND_UP_WORD (length
);
19787 ASM_OUTPUT_ASCII (stream
, name
, length
);
19788 ASM_OUTPUT_ALIGN (stream
, 2);
19789 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19790 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19793 /* Place some comments into the assembler stream
19794 describing the current function. */
19796 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19798 unsigned long func_type
;
19800 /* ??? Do we want to print some of the below anyway? */
19804 /* Sanity check. */
19805 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19807 func_type
= arm_current_func_type ();
19809 switch ((int) ARM_FUNC_TYPE (func_type
))
19812 case ARM_FT_NORMAL
:
19814 case ARM_FT_INTERWORKED
:
19815 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19818 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19821 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19823 case ARM_FT_EXCEPTION
:
19824 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19828 if (IS_NAKED (func_type
))
19829 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19831 if (IS_VOLATILE (func_type
))
19832 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19834 if (IS_NESTED (func_type
))
19835 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19836 if (IS_STACKALIGN (func_type
))
19837 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19839 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19841 crtl
->args
.pretend_args_size
, frame_size
);
19843 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19844 frame_pointer_needed
,
19845 cfun
->machine
->uses_anonymous_args
);
19847 if (cfun
->machine
->lr_save_eliminated
)
19848 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19850 if (crtl
->calls_eh_return
)
19851 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19856 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19857 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19859 arm_stack_offsets
*offsets
;
19865 /* Emit any call-via-reg trampolines that are needed for v4t support
19866 of call_reg and call_value_reg type insns. */
19867 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19869 rtx label
= cfun
->machine
->call_via
[regno
];
19873 switch_to_section (function_section (current_function_decl
));
19874 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19875 CODE_LABEL_NUMBER (label
));
19876 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19880 /* ??? Probably not safe to set this here, since it assumes that a
19881 function will be emitted as assembly immediately after we generate
19882 RTL for it. This does not happen for inline functions. */
19883 cfun
->machine
->return_used_this_function
= 0;
19885 else /* TARGET_32BIT */
19887 /* We need to take into account any stack-frame rounding. */
19888 offsets
= arm_get_frame_offsets ();
19890 gcc_assert (!use_return_insn (FALSE
, NULL
)
19891 || (cfun
->machine
->return_used_this_function
!= 0)
19892 || offsets
->saved_regs
== offsets
->outgoing_args
19893 || frame_pointer_needed
);
19897 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19898 STR and STRD. If an even number of registers are being pushed, one
19899 or more STRD patterns are created for each register pair. If an
19900 odd number of registers are pushed, emit an initial STR followed by
19901 as many STRD instructions as are needed. This works best when the
19902 stack is initially 64-bit aligned (the normal case), since it
19903 ensures that each STRD is also 64-bit aligned. */
19905 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19910 rtx par
= NULL_RTX
;
19911 rtx dwarf
= NULL_RTX
;
19915 num_regs
= bit_count (saved_regs_mask
);
19917 /* Must be at least one register to save, and can't save SP or PC. */
19918 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19919 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19920 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19922 /* Create sequence for DWARF info. All the frame-related data for
19923 debugging is held in this wrapper. */
19924 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19926 /* Describe the stack adjustment. */
19927 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19928 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19929 RTX_FRAME_RELATED_P (tmp
) = 1;
19930 XVECEXP (dwarf
, 0, 0) = tmp
;
19932 /* Find the first register. */
19933 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19938 /* If there's an odd number of registers to push. Start off by
19939 pushing a single register. This ensures that subsequent strd
19940 operations are dword aligned (assuming that SP was originally
19941 64-bit aligned). */
19942 if ((num_regs
& 1) != 0)
19944 rtx reg
, mem
, insn
;
19946 reg
= gen_rtx_REG (SImode
, regno
);
19948 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19949 stack_pointer_rtx
));
19951 mem
= gen_frame_mem (Pmode
,
19953 (Pmode
, stack_pointer_rtx
,
19954 plus_constant (Pmode
, stack_pointer_rtx
,
19957 tmp
= gen_rtx_SET (mem
, reg
);
19958 RTX_FRAME_RELATED_P (tmp
) = 1;
19959 insn
= emit_insn (tmp
);
19960 RTX_FRAME_RELATED_P (insn
) = 1;
19961 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19962 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19963 RTX_FRAME_RELATED_P (tmp
) = 1;
19966 XVECEXP (dwarf
, 0, i
) = tmp
;
19970 while (i
< num_regs
)
19971 if (saved_regs_mask
& (1 << regno
))
19973 rtx reg1
, reg2
, mem1
, mem2
;
19974 rtx tmp0
, tmp1
, tmp2
;
19977 /* Find the register to pair with this one. */
19978 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19982 reg1
= gen_rtx_REG (SImode
, regno
);
19983 reg2
= gen_rtx_REG (SImode
, regno2
);
19990 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19993 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19995 -4 * (num_regs
- 1)));
19996 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19997 plus_constant (Pmode
, stack_pointer_rtx
,
19999 tmp1
= gen_rtx_SET (mem1
, reg1
);
20000 tmp2
= gen_rtx_SET (mem2
, reg2
);
20001 RTX_FRAME_RELATED_P (tmp0
) = 1;
20002 RTX_FRAME_RELATED_P (tmp1
) = 1;
20003 RTX_FRAME_RELATED_P (tmp2
) = 1;
20004 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
20005 XVECEXP (par
, 0, 0) = tmp0
;
20006 XVECEXP (par
, 0, 1) = tmp1
;
20007 XVECEXP (par
, 0, 2) = tmp2
;
20008 insn
= emit_insn (par
);
20009 RTX_FRAME_RELATED_P (insn
) = 1;
20010 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20014 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20017 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
20020 tmp1
= gen_rtx_SET (mem1
, reg1
);
20021 tmp2
= gen_rtx_SET (mem2
, reg2
);
20022 RTX_FRAME_RELATED_P (tmp1
) = 1;
20023 RTX_FRAME_RELATED_P (tmp2
) = 1;
20024 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20025 XVECEXP (par
, 0, 0) = tmp1
;
20026 XVECEXP (par
, 0, 1) = tmp2
;
20030 /* Create unwind information. This is an approximation. */
20031 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
20032 plus_constant (Pmode
,
20036 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
20037 plus_constant (Pmode
,
20042 RTX_FRAME_RELATED_P (tmp1
) = 1;
20043 RTX_FRAME_RELATED_P (tmp2
) = 1;
20044 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
20045 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
20047 regno
= regno2
+ 1;
20055 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20056 whenever possible, otherwise it emits single-word stores. The first store
20057 also allocates stack space for all saved registers, using writeback with
20058 post-addressing mode. All other stores use offset addressing. If no STRD
20059 can be emitted, this function emits a sequence of single-word stores,
20060 and not an STM as before, because single-word stores provide more freedom
20061 scheduling and can be turned into an STM by peephole optimizations. */
20063 arm_emit_strd_push (unsigned long saved_regs_mask
)
20066 int i
, j
, dwarf_index
= 0;
20068 rtx dwarf
= NULL_RTX
;
20069 rtx insn
= NULL_RTX
;
20072 /* TODO: A more efficient code can be emitted by changing the
20073 layout, e.g., first push all pairs that can use STRD to keep the
20074 stack aligned, and then push all other registers. */
20075 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20076 if (saved_regs_mask
& (1 << i
))
20079 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20080 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20081 gcc_assert (num_regs
> 0);
20083 /* Create sequence for DWARF info. */
20084 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20086 /* For dwarf info, we generate explicit stack update. */
20087 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20088 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20089 RTX_FRAME_RELATED_P (tmp
) = 1;
20090 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20092 /* Save registers. */
20093 offset
= - 4 * num_regs
;
20095 while (j
<= LAST_ARM_REGNUM
)
20096 if (saved_regs_mask
& (1 << j
))
20099 && (saved_regs_mask
& (1 << (j
+ 1))))
20101 /* Current register and previous register form register pair for
20102 which STRD can be generated. */
20105 /* Allocate stack space for all saved registers. */
20106 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20107 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20108 mem
= gen_frame_mem (DImode
, tmp
);
20111 else if (offset
> 0)
20112 mem
= gen_frame_mem (DImode
,
20113 plus_constant (Pmode
,
20117 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20119 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20120 RTX_FRAME_RELATED_P (tmp
) = 1;
20121 tmp
= emit_insn (tmp
);
20123 /* Record the first store insn. */
20124 if (dwarf_index
== 1)
20127 /* Generate dwarf info. */
20128 mem
= gen_frame_mem (SImode
,
20129 plus_constant (Pmode
,
20132 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20133 RTX_FRAME_RELATED_P (tmp
) = 1;
20134 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20136 mem
= gen_frame_mem (SImode
,
20137 plus_constant (Pmode
,
20140 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20141 RTX_FRAME_RELATED_P (tmp
) = 1;
20142 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20149 /* Emit a single word store. */
20152 /* Allocate stack space for all saved registers. */
20153 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20154 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20155 mem
= gen_frame_mem (SImode
, tmp
);
20158 else if (offset
> 0)
20159 mem
= gen_frame_mem (SImode
,
20160 plus_constant (Pmode
,
20164 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20166 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20167 RTX_FRAME_RELATED_P (tmp
) = 1;
20168 tmp
= emit_insn (tmp
);
20170 /* Record the first store insn. */
20171 if (dwarf_index
== 1)
20174 /* Generate dwarf info. */
20175 mem
= gen_frame_mem (SImode
,
20176 plus_constant(Pmode
,
20179 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20180 RTX_FRAME_RELATED_P (tmp
) = 1;
20181 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20190 /* Attach dwarf info to the first insn we generate. */
20191 gcc_assert (insn
!= NULL_RTX
);
20192 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20193 RTX_FRAME_RELATED_P (insn
) = 1;
20196 /* Generate and emit an insn that we will recognize as a push_multi.
20197 Unfortunately, since this insn does not reflect very well the actual
20198 semantics of the operation, we need to annotate the insn for the benefit
20199 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20200 MASK for registers that should be annotated for DWARF2 frame unwind
20203 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20206 int num_dwarf_regs
= 0;
20210 int dwarf_par_index
;
20213 /* We don't record the PC in the dwarf frame information. */
20214 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20216 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20218 if (mask
& (1 << i
))
20220 if (dwarf_regs_mask
& (1 << i
))
20224 gcc_assert (num_regs
&& num_regs
<= 16);
20225 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20227 /* For the body of the insn we are going to generate an UNSPEC in
20228 parallel with several USEs. This allows the insn to be recognized
20229 by the push_multi pattern in the arm.md file.
20231 The body of the insn looks something like this:
20234 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20235 (const_int:SI <num>)))
20236 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20242 For the frame note however, we try to be more explicit and actually
20243 show each register being stored into the stack frame, plus a (single)
20244 decrement of the stack pointer. We do it this way in order to be
20245 friendly to the stack unwinding code, which only wants to see a single
20246 stack decrement per instruction. The RTL we generate for the note looks
20247 something like this:
20250 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20251 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20252 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20253 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20257 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20258 instead we'd have a parallel expression detailing all
20259 the stores to the various memory addresses so that debug
20260 information is more up-to-date. Remember however while writing
20261 this to take care of the constraints with the push instruction.
20263 Note also that this has to be taken care of for the VFP registers.
20265 For more see PR43399. */
20267 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20268 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20269 dwarf_par_index
= 1;
20271 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20273 if (mask
& (1 << i
))
20275 reg
= gen_rtx_REG (SImode
, i
);
20277 XVECEXP (par
, 0, 0)
20278 = gen_rtx_SET (gen_frame_mem
20280 gen_rtx_PRE_MODIFY (Pmode
,
20283 (Pmode
, stack_pointer_rtx
,
20286 gen_rtx_UNSPEC (BLKmode
,
20287 gen_rtvec (1, reg
),
20288 UNSPEC_PUSH_MULT
));
20290 if (dwarf_regs_mask
& (1 << i
))
20292 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20294 RTX_FRAME_RELATED_P (tmp
) = 1;
20295 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20302 for (j
= 1, i
++; j
< num_regs
; i
++)
20304 if (mask
& (1 << i
))
20306 reg
= gen_rtx_REG (SImode
, i
);
20308 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20310 if (dwarf_regs_mask
& (1 << i
))
20313 = gen_rtx_SET (gen_frame_mem
20315 plus_constant (Pmode
, stack_pointer_rtx
,
20318 RTX_FRAME_RELATED_P (tmp
) = 1;
20319 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20326 par
= emit_insn (par
);
20328 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20329 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20330 RTX_FRAME_RELATED_P (tmp
) = 1;
20331 XVECEXP (dwarf
, 0, 0) = tmp
;
20333 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20338 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20339 SIZE is the offset to be adjusted.
20340 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20342 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20346 RTX_FRAME_RELATED_P (insn
) = 1;
20347 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20348 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20351 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20352 SAVED_REGS_MASK shows which registers need to be restored.
20354 Unfortunately, since this insn does not reflect very well the actual
20355 semantics of the operation, we need to annotate the insn for the benefit
20356 of DWARF2 frame unwind information. */
20358 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20363 rtx dwarf
= NULL_RTX
;
20365 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20369 offset_adj
= return_in_pc
? 1 : 0;
20370 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20371 if (saved_regs_mask
& (1 << i
))
20374 gcc_assert (num_regs
&& num_regs
<= 16);
20376 /* If SP is in reglist, then we don't emit SP update insn. */
20377 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20379 /* The parallel needs to hold num_regs SETs
20380 and one SET for the stack update. */
20381 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20384 XVECEXP (par
, 0, 0) = ret_rtx
;
20388 /* Increment the stack pointer, based on there being
20389 num_regs 4-byte registers to restore. */
20390 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20391 plus_constant (Pmode
,
20394 RTX_FRAME_RELATED_P (tmp
) = 1;
20395 XVECEXP (par
, 0, offset_adj
) = tmp
;
20398 /* Now restore every reg, which may include PC. */
20399 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20400 if (saved_regs_mask
& (1 << i
))
20402 reg
= gen_rtx_REG (SImode
, i
);
20403 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20405 /* Emit single load with writeback. */
20406 tmp
= gen_frame_mem (SImode
,
20407 gen_rtx_POST_INC (Pmode
,
20408 stack_pointer_rtx
));
20409 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20410 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20414 tmp
= gen_rtx_SET (reg
,
20417 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20418 RTX_FRAME_RELATED_P (tmp
) = 1;
20419 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20421 /* We need to maintain a sequence for DWARF info too. As dwarf info
20422 should not have PC, skip PC. */
20423 if (i
!= PC_REGNUM
)
20424 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20430 par
= emit_jump_insn (par
);
20432 par
= emit_insn (par
);
20434 REG_NOTES (par
) = dwarf
;
20436 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20437 stack_pointer_rtx
, stack_pointer_rtx
);
20440 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20441 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20443 Unfortunately, since this insn does not reflect very well the actual
20444 semantics of the operation, we need to annotate the insn for the benefit
20445 of DWARF2 frame unwind information. */
20447 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20451 rtx dwarf
= NULL_RTX
;
20454 gcc_assert (num_regs
&& num_regs
<= 32);
20456 /* Workaround ARM10 VFPr1 bug. */
20457 if (num_regs
== 2 && !arm_arch6
)
20459 if (first_reg
== 15)
20465 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20466 there could be up to 32 D-registers to restore.
20467 If there are more than 16 D-registers, make two recursive calls,
20468 each of which emits one pop_multi instruction. */
20471 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20472 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20476 /* The parallel needs to hold num_regs SETs
20477 and one SET for the stack update. */
20478 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20480 /* Increment the stack pointer, based on there being
20481 num_regs 8-byte registers to restore. */
20482 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20483 RTX_FRAME_RELATED_P (tmp
) = 1;
20484 XVECEXP (par
, 0, 0) = tmp
;
20486 /* Now show every reg that will be restored, using a SET for each. */
20487 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20489 reg
= gen_rtx_REG (DFmode
, i
);
20491 tmp
= gen_rtx_SET (reg
,
20494 plus_constant (Pmode
, base_reg
, 8 * j
)));
20495 RTX_FRAME_RELATED_P (tmp
) = 1;
20496 XVECEXP (par
, 0, j
+ 1) = tmp
;
20498 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20503 par
= emit_insn (par
);
20504 REG_NOTES (par
) = dwarf
;
20506 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20507 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20509 RTX_FRAME_RELATED_P (par
) = 1;
20510 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20513 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20514 base_reg
, base_reg
);
20517 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20518 number of registers are being popped, multiple LDRD patterns are created for
20519 all register pairs. If odd number of registers are popped, last register is
20520 loaded by using LDR pattern. */
20522 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20526 rtx par
= NULL_RTX
;
20527 rtx dwarf
= NULL_RTX
;
20528 rtx tmp
, reg
, tmp1
;
20529 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20531 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20532 if (saved_regs_mask
& (1 << i
))
20535 gcc_assert (num_regs
&& num_regs
<= 16);
20537 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20538 to be popped. So, if num_regs is even, now it will become odd,
20539 and we can generate pop with PC. If num_regs is odd, it will be
20540 even now, and ldr with return can be generated for PC. */
20544 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20546 /* Var j iterates over all the registers to gather all the registers in
20547 saved_regs_mask. Var i gives index of saved registers in stack frame.
20548 A PARALLEL RTX of register-pair is created here, so that pattern for
20549 LDRD can be matched. As PC is always last register to be popped, and
20550 we have already decremented num_regs if PC, we don't have to worry
20551 about PC in this loop. */
20552 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20553 if (saved_regs_mask
& (1 << j
))
20555 /* Create RTX for memory load. */
20556 reg
= gen_rtx_REG (SImode
, j
);
20557 tmp
= gen_rtx_SET (reg
,
20558 gen_frame_mem (SImode
,
20559 plus_constant (Pmode
,
20560 stack_pointer_rtx
, 4 * i
)));
20561 RTX_FRAME_RELATED_P (tmp
) = 1;
20565 /* When saved-register index (i) is even, the RTX to be emitted is
20566 yet to be created. Hence create it first. The LDRD pattern we
20567 are generating is :
20568 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20569 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20570 where target registers need not be consecutive. */
20571 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20575 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20576 added as 0th element and if i is odd, reg_i is added as 1st element
20577 of LDRD pattern shown above. */
20578 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20579 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20583 /* When saved-register index (i) is odd, RTXs for both the registers
20584 to be loaded are generated in above given LDRD pattern, and the
20585 pattern can be emitted now. */
20586 par
= emit_insn (par
);
20587 REG_NOTES (par
) = dwarf
;
20588 RTX_FRAME_RELATED_P (par
) = 1;
20594 /* If the number of registers pushed is odd AND return_in_pc is false OR
20595 number of registers are even AND return_in_pc is true, last register is
20596 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20597 then LDR with post increment. */
20599 /* Increment the stack pointer, based on there being
20600 num_regs 4-byte registers to restore. */
20601 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20602 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20603 RTX_FRAME_RELATED_P (tmp
) = 1;
20604 tmp
= emit_insn (tmp
);
20607 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20608 stack_pointer_rtx
, stack_pointer_rtx
);
20613 if (((num_regs
% 2) == 1 && !return_in_pc
)
20614 || ((num_regs
% 2) == 0 && return_in_pc
))
20616 /* Scan for the single register to be popped. Skip until the saved
20617 register is found. */
20618 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20620 /* Gen LDR with post increment here. */
20621 tmp1
= gen_rtx_MEM (SImode
,
20622 gen_rtx_POST_INC (SImode
,
20623 stack_pointer_rtx
));
20624 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20626 reg
= gen_rtx_REG (SImode
, j
);
20627 tmp
= gen_rtx_SET (reg
, tmp1
);
20628 RTX_FRAME_RELATED_P (tmp
) = 1;
20629 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20633 /* If return_in_pc, j must be PC_REGNUM. */
20634 gcc_assert (j
== PC_REGNUM
);
20635 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20636 XVECEXP (par
, 0, 0) = ret_rtx
;
20637 XVECEXP (par
, 0, 1) = tmp
;
20638 par
= emit_jump_insn (par
);
20642 par
= emit_insn (tmp
);
20643 REG_NOTES (par
) = dwarf
;
20644 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20645 stack_pointer_rtx
, stack_pointer_rtx
);
20649 else if ((num_regs
% 2) == 1 && return_in_pc
)
20651 /* There are 2 registers to be popped. So, generate the pattern
20652 pop_multiple_with_stack_update_and_return to pop in PC. */
20653 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20659 /* LDRD in ARM mode needs consecutive registers as operands. This function
20660 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20661 offset addressing and then generates one separate stack udpate. This provides
20662 more scheduling freedom, compared to writeback on every load. However,
20663 if the function returns using load into PC directly
20664 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20665 before the last load. TODO: Add a peephole optimization to recognize
20666 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20667 peephole optimization to merge the load at stack-offset zero
20668 with the stack update instruction using load with writeback
20669 in post-index addressing mode. */
20671 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20675 rtx par
= NULL_RTX
;
20676 rtx dwarf
= NULL_RTX
;
20679 /* Restore saved registers. */
20680 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20682 while (j
<= LAST_ARM_REGNUM
)
20683 if (saved_regs_mask
& (1 << j
))
20686 && (saved_regs_mask
& (1 << (j
+ 1)))
20687 && (j
+ 1) != PC_REGNUM
)
20689 /* Current register and next register form register pair for which
20690 LDRD can be generated. PC is always the last register popped, and
20691 we handle it separately. */
20693 mem
= gen_frame_mem (DImode
,
20694 plus_constant (Pmode
,
20698 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20700 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20701 tmp
= emit_insn (tmp
);
20702 RTX_FRAME_RELATED_P (tmp
) = 1;
20704 /* Generate dwarf info. */
20706 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20707 gen_rtx_REG (SImode
, j
),
20709 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20710 gen_rtx_REG (SImode
, j
+ 1),
20713 REG_NOTES (tmp
) = dwarf
;
20718 else if (j
!= PC_REGNUM
)
20720 /* Emit a single word load. */
20722 mem
= gen_frame_mem (SImode
,
20723 plus_constant (Pmode
,
20727 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20729 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20730 tmp
= emit_insn (tmp
);
20731 RTX_FRAME_RELATED_P (tmp
) = 1;
20733 /* Generate dwarf info. */
20734 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20735 gen_rtx_REG (SImode
, j
),
20741 else /* j == PC_REGNUM */
20747 /* Update the stack. */
20750 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20751 plus_constant (Pmode
,
20754 tmp
= emit_insn (tmp
);
20755 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20756 stack_pointer_rtx
, stack_pointer_rtx
);
20760 if (saved_regs_mask
& (1 << PC_REGNUM
))
20762 /* Only PC is to be popped. */
20763 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20764 XVECEXP (par
, 0, 0) = ret_rtx
;
20765 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20766 gen_frame_mem (SImode
,
20767 gen_rtx_POST_INC (SImode
,
20768 stack_pointer_rtx
)));
20769 RTX_FRAME_RELATED_P (tmp
) = 1;
20770 XVECEXP (par
, 0, 1) = tmp
;
20771 par
= emit_jump_insn (par
);
20773 /* Generate dwarf info. */
20774 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20775 gen_rtx_REG (SImode
, PC_REGNUM
),
20777 REG_NOTES (par
) = dwarf
;
20778 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20779 stack_pointer_rtx
, stack_pointer_rtx
);
20783 /* Calculate the size of the return value that is passed in registers. */
20785 arm_size_return_regs (void)
20789 if (crtl
->return_rtx
!= 0)
20790 mode
= GET_MODE (crtl
->return_rtx
);
20792 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20794 return GET_MODE_SIZE (mode
);
20797 /* Return true if the current function needs to save/restore LR. */
20799 thumb_force_lr_save (void)
20801 return !cfun
->machine
->lr_save_eliminated
20802 && (!leaf_function_p ()
20803 || thumb_far_jump_used_p ()
20804 || df_regs_ever_live_p (LR_REGNUM
));
20807 /* We do not know if r3 will be available because
20808 we do have an indirect tailcall happening in this
20809 particular case. */
20811 is_indirect_tailcall_p (rtx call
)
20813 rtx pat
= PATTERN (call
);
20815 /* Indirect tail call. */
20816 pat
= XVECEXP (pat
, 0, 0);
20817 if (GET_CODE (pat
) == SET
)
20818 pat
= SET_SRC (pat
);
20820 pat
= XEXP (XEXP (pat
, 0), 0);
20821 return REG_P (pat
);
20824 /* Return true if r3 is used by any of the tail call insns in the
20825 current function. */
20827 any_sibcall_could_use_r3 (void)
20832 if (!crtl
->tail_call_emit
)
20834 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20835 if (e
->flags
& EDGE_SIBCALL
)
20837 rtx call
= BB_END (e
->src
);
20838 if (!CALL_P (call
))
20839 call
= prev_nonnote_nondebug_insn (call
);
20840 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20841 if (find_regno_fusage (call
, USE
, 3)
20842 || is_indirect_tailcall_p (call
))
20849 /* Compute the distance from register FROM to register TO.
20850 These can be the arg pointer (26), the soft frame pointer (25),
20851 the stack pointer (13) or the hard frame pointer (11).
20852 In thumb mode r7 is used as the soft frame pointer, if needed.
20853 Typical stack layout looks like this:
20855 old stack pointer -> | |
20858 | | saved arguments for
20859 | | vararg functions
20862 hard FP & arg pointer -> | | \
20870 soft frame pointer -> | | /
20875 locals base pointer -> | | /
20880 current stack pointer -> | | /
20883 For a given function some or all of these stack components
20884 may not be needed, giving rise to the possibility of
20885 eliminating some of the registers.
20887 The values returned by this function must reflect the behavior
20888 of arm_expand_prologue() and arm_compute_save_reg_mask().
20890 The sign of the number returned reflects the direction of stack
20891 growth, so the values are positive for all eliminations except
20892 from the soft frame pointer to the hard frame pointer.
20894 SFP may point just inside the local variables block to ensure correct
20898 /* Calculate stack offsets. These are used to calculate register elimination
20899 offsets and in prologue/epilogue code. Also calculates which registers
20900 should be saved. */
20902 static arm_stack_offsets
*
20903 arm_get_frame_offsets (void)
20905 struct arm_stack_offsets
*offsets
;
20906 unsigned long func_type
;
20910 HOST_WIDE_INT frame_size
;
20913 offsets
= &cfun
->machine
->stack_offsets
;
20915 /* We need to know if we are a leaf function. Unfortunately, it
20916 is possible to be called after start_sequence has been called,
20917 which causes get_insns to return the insns for the sequence,
20918 not the function, which will cause leaf_function_p to return
20919 the incorrect result.
20921 to know about leaf functions once reload has completed, and the
20922 frame size cannot be changed after that time, so we can safely
20923 use the cached value. */
20925 if (reload_completed
)
20928 /* Initially this is the size of the local variables. It will translated
20929 into an offset once we have determined the size of preceding data. */
20930 frame_size
= ROUND_UP_WORD (get_frame_size ());
20932 leaf
= leaf_function_p ();
20934 /* Space for variadic functions. */
20935 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20937 /* In Thumb mode this is incorrect, but never used. */
20939 = (offsets
->saved_args
20940 + arm_compute_static_chain_stack_bytes ()
20941 + (frame_pointer_needed
? 4 : 0));
20945 unsigned int regno
;
20947 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20948 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20949 saved
= core_saved
;
20951 /* We know that SP will be doubleword aligned on entry, and we must
20952 preserve that condition at any subroutine call. We also require the
20953 soft frame pointer to be doubleword aligned. */
20955 if (TARGET_REALLY_IWMMXT
)
20957 /* Check for the call-saved iWMMXt registers. */
20958 for (regno
= FIRST_IWMMXT_REGNUM
;
20959 regno
<= LAST_IWMMXT_REGNUM
;
20961 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20965 func_type
= arm_current_func_type ();
20966 /* Space for saved VFP registers. */
20967 if (! IS_VOLATILE (func_type
)
20968 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20969 saved
+= arm_get_vfp_saved_size ();
20971 else /* TARGET_THUMB1 */
20973 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20974 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20975 saved
= core_saved
;
20976 if (TARGET_BACKTRACE
)
20980 /* Saved registers include the stack frame. */
20981 offsets
->saved_regs
20982 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20983 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20985 /* A leaf function does not need any stack alignment if it has nothing
20987 if (leaf
&& frame_size
== 0
20988 /* However if it calls alloca(), we have a dynamically allocated
20989 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20990 && ! cfun
->calls_alloca
)
20992 offsets
->outgoing_args
= offsets
->soft_frame
;
20993 offsets
->locals_base
= offsets
->soft_frame
;
20997 /* Ensure SFP has the correct alignment. */
20998 if (ARM_DOUBLEWORD_ALIGN
20999 && (offsets
->soft_frame
& 7))
21001 offsets
->soft_frame
+= 4;
21002 /* Try to align stack by pushing an extra reg. Don't bother doing this
21003 when there is a stack frame as the alignment will be rolled into
21004 the normal stack adjustment. */
21005 if (frame_size
+ crtl
->outgoing_args_size
== 0)
21009 /* Register r3 is caller-saved. Normally it does not need to be
21010 saved on entry by the prologue. However if we choose to save
21011 it for padding then we may confuse the compiler into thinking
21012 a prologue sequence is required when in fact it is not. This
21013 will occur when shrink-wrapping if r3 is used as a scratch
21014 register and there are no other callee-saved writes.
21016 This situation can be avoided when other callee-saved registers
21017 are available and r3 is not mandatory if we choose a callee-saved
21018 register for padding. */
21019 bool prefer_callee_reg_p
= false;
21021 /* If it is safe to use r3, then do so. This sometimes
21022 generates better code on Thumb-2 by avoiding the need to
21023 use 32-bit push/pop instructions. */
21024 if (! any_sibcall_could_use_r3 ()
21025 && arm_size_return_regs () <= 12
21026 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
21028 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
21031 if (!TARGET_THUMB2
)
21032 prefer_callee_reg_p
= true;
21035 || prefer_callee_reg_p
)
21037 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
21039 /* Avoid fixed registers; they may be changed at
21040 arbitrary times so it's unsafe to restore them
21041 during the epilogue. */
21043 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
21053 offsets
->saved_regs
+= 4;
21054 offsets
->saved_regs_mask
|= (1 << reg
);
21059 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
21060 offsets
->outgoing_args
= (offsets
->locals_base
21061 + crtl
->outgoing_args_size
);
21063 if (ARM_DOUBLEWORD_ALIGN
)
21065 /* Ensure SP remains doubleword aligned. */
21066 if (offsets
->outgoing_args
& 7)
21067 offsets
->outgoing_args
+= 4;
21068 gcc_assert (!(offsets
->outgoing_args
& 7));
21075 /* Calculate the relative offsets for the different stack pointers. Positive
21076 offsets are in the direction of stack growth. */
21079 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
21081 arm_stack_offsets
*offsets
;
21083 offsets
= arm_get_frame_offsets ();
21085 /* OK, now we have enough information to compute the distances.
21086 There must be an entry in these switch tables for each pair
21087 of registers in ELIMINABLE_REGS, even if some of the entries
21088 seem to be redundant or useless. */
21091 case ARG_POINTER_REGNUM
:
21094 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21097 case FRAME_POINTER_REGNUM
:
21098 /* This is the reverse of the soft frame pointer
21099 to hard frame pointer elimination below. */
21100 return offsets
->soft_frame
- offsets
->saved_args
;
21102 case ARM_HARD_FRAME_POINTER_REGNUM
:
21103 /* This is only non-zero in the case where the static chain register
21104 is stored above the frame. */
21105 return offsets
->frame
- offsets
->saved_args
- 4;
21107 case STACK_POINTER_REGNUM
:
21108 /* If nothing has been pushed on the stack at all
21109 then this will return -4. This *is* correct! */
21110 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21113 gcc_unreachable ();
21115 gcc_unreachable ();
21117 case FRAME_POINTER_REGNUM
:
21120 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21123 case ARM_HARD_FRAME_POINTER_REGNUM
:
21124 /* The hard frame pointer points to the top entry in the
21125 stack frame. The soft frame pointer to the bottom entry
21126 in the stack frame. If there is no stack frame at all,
21127 then they are identical. */
21129 return offsets
->frame
- offsets
->soft_frame
;
21131 case STACK_POINTER_REGNUM
:
21132 return offsets
->outgoing_args
- offsets
->soft_frame
;
21135 gcc_unreachable ();
21137 gcc_unreachable ();
21140 /* You cannot eliminate from the stack pointer.
21141 In theory you could eliminate from the hard frame
21142 pointer to the stack pointer, but this will never
21143 happen, since if a stack frame is not needed the
21144 hard frame pointer will never be used. */
21145 gcc_unreachable ();
21149 /* Given FROM and TO register numbers, say whether this elimination is
21150 allowed. Frame pointer elimination is automatically handled.
21152 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21153 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21154 pointer, we must eliminate FRAME_POINTER_REGNUM into
21155 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21156 ARG_POINTER_REGNUM. */
21159 arm_can_eliminate (const int from
, const int to
)
21161 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21162 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21163 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21164 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21168 /* Emit RTL to save coprocessor registers on function entry. Returns the
21169 number of bytes pushed. */
21172 arm_save_coproc_regs(void)
21174 int saved_size
= 0;
21176 unsigned start_reg
;
21179 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21180 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21182 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21183 insn
= gen_rtx_MEM (V2SImode
, insn
);
21184 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21185 RTX_FRAME_RELATED_P (insn
) = 1;
21189 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
21191 start_reg
= FIRST_VFP_REGNUM
;
21193 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21195 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21196 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21198 if (start_reg
!= reg
)
21199 saved_size
+= vfp_emit_fstmd (start_reg
,
21200 (reg
- start_reg
) / 2);
21201 start_reg
= reg
+ 2;
21204 if (start_reg
!= reg
)
21205 saved_size
+= vfp_emit_fstmd (start_reg
,
21206 (reg
- start_reg
) / 2);
21212 /* Set the Thumb frame pointer from the stack pointer. */
21215 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21217 HOST_WIDE_INT amount
;
21220 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21222 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21223 stack_pointer_rtx
, GEN_INT (amount
)));
21226 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21227 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21228 expects the first two operands to be the same. */
21231 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21233 hard_frame_pointer_rtx
));
21237 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21238 hard_frame_pointer_rtx
,
21239 stack_pointer_rtx
));
21241 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21242 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21243 RTX_FRAME_RELATED_P (dwarf
) = 1;
21244 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21247 RTX_FRAME_RELATED_P (insn
) = 1;
21250 struct scratch_reg
{
21255 /* Return a short-lived scratch register for use as a 2nd scratch register on
21256 function entry after the registers are saved in the prologue. This register
21257 must be released by means of release_scratch_register_on_entry. IP is not
21258 considered since it is always used as the 1st scratch register if available.
21260 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21261 mask of live registers. */
21264 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21265 unsigned long live_regs
)
21271 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21277 for (i
= 4; i
< 11; i
++)
21278 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21286 /* If IP is used as the 1st scratch register for a nested function,
21287 then either r3 wasn't available or is used to preserve IP. */
21288 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21290 regno
= (regno1
== 3 ? 2 : 3);
21292 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21297 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21300 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21301 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21302 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21303 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21304 RTX_FRAME_RELATED_P (insn
) = 1;
21305 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21309 /* Release a scratch register obtained from the preceding function. */
21312 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21316 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21317 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21318 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21319 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21320 RTX_FRAME_RELATED_P (insn
) = 1;
21321 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21325 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21327 #if PROBE_INTERVAL > 4096
21328 #error Cannot use indexed addressing mode for stack probing
21331 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21332 inclusive. These are offsets from the current stack pointer. REGNO1
21333 is the index number of the 1st scratch register and LIVE_REGS is the
21334 mask of live registers. */
21337 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21338 unsigned int regno1
, unsigned long live_regs
)
21340 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21342 /* See if we have a constant small number of probes to generate. If so,
21343 that's the easy case. */
21344 if (size
<= PROBE_INTERVAL
)
21346 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21347 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21348 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21351 /* The run-time loop is made up of 10 insns in the generic case while the
21352 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21353 else if (size
<= 5 * PROBE_INTERVAL
)
21355 HOST_WIDE_INT i
, rem
;
21357 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21358 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21359 emit_stack_probe (reg1
);
21361 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21362 it exceeds SIZE. If only two probes are needed, this will not
21363 generate any code. Then probe at FIRST + SIZE. */
21364 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21366 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21367 emit_stack_probe (reg1
);
21370 rem
= size
- (i
- PROBE_INTERVAL
);
21371 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21373 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21374 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21377 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21380 /* Otherwise, do the same as above, but in a loop. Note that we must be
21381 extra careful with variables wrapping around because we might be at
21382 the very top (or the very bottom) of the address space and we have
21383 to be able to handle this case properly; in particular, we use an
21384 equality test for the loop condition. */
21387 HOST_WIDE_INT rounded_size
;
21388 struct scratch_reg sr
;
21390 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21392 emit_move_insn (reg1
, GEN_INT (first
));
21395 /* Step 1: round SIZE to the previous multiple of the interval. */
21397 rounded_size
= size
& -PROBE_INTERVAL
;
21398 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21401 /* Step 2: compute initial and final value of the loop counter. */
21403 /* TEST_ADDR = SP + FIRST. */
21404 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21406 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21407 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21410 /* Step 3: the loop
21414 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21417 while (TEST_ADDR != LAST_ADDR)
21419 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21420 until it is equal to ROUNDED_SIZE. */
21422 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21425 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21426 that SIZE is equal to ROUNDED_SIZE. */
21428 if (size
!= rounded_size
)
21430 HOST_WIDE_INT rem
= size
- rounded_size
;
21432 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21434 emit_set_insn (sr
.reg
,
21435 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21436 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21437 PROBE_INTERVAL
- rem
));
21440 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21443 release_scratch_register_on_entry (&sr
);
21446 /* Make sure nothing is scheduled before we are done. */
21447 emit_insn (gen_blockage ());
21450 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21451 absolute addresses. */
21454 output_probe_stack_range (rtx reg1
, rtx reg2
)
21456 static int labelno
= 0;
21460 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21463 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21465 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21467 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21468 output_asm_insn ("sub\t%0, %0, %1", xops
);
21470 /* Probe at TEST_ADDR. */
21471 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21473 /* Test if TEST_ADDR == LAST_ADDR. */
21475 output_asm_insn ("cmp\t%0, %1", xops
);
21478 fputs ("\tbne\t", asm_out_file
);
21479 assemble_name_raw (asm_out_file
, loop_lab
);
21480 fputc ('\n', asm_out_file
);
21485 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21488 arm_expand_prologue (void)
21493 unsigned long live_regs_mask
;
21494 unsigned long func_type
;
21496 int saved_pretend_args
= 0;
21497 int saved_regs
= 0;
21498 unsigned HOST_WIDE_INT args_to_push
;
21499 HOST_WIDE_INT size
;
21500 arm_stack_offsets
*offsets
;
21503 func_type
= arm_current_func_type ();
21505 /* Naked functions don't have prologues. */
21506 if (IS_NAKED (func_type
))
21508 if (flag_stack_usage_info
)
21509 current_function_static_stack_size
= 0;
21513 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21514 args_to_push
= crtl
->args
.pretend_args_size
;
21516 /* Compute which register we will have to save onto the stack. */
21517 offsets
= arm_get_frame_offsets ();
21518 live_regs_mask
= offsets
->saved_regs_mask
;
21520 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21522 if (IS_STACKALIGN (func_type
))
21526 /* Handle a word-aligned stack pointer. We generate the following:
21531 <save and restore r0 in normal prologue/epilogue>
21535 The unwinder doesn't need to know about the stack realignment.
21536 Just tell it we saved SP in r0. */
21537 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21539 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21540 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21542 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21543 RTX_FRAME_RELATED_P (insn
) = 1;
21544 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21546 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21548 /* ??? The CFA changes here, which may cause GDB to conclude that it
21549 has entered a different function. That said, the unwind info is
21550 correct, individually, before and after this instruction because
21551 we've described the save of SP, which will override the default
21552 handling of SP as restoring from the CFA. */
21553 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21556 /* The static chain register is the same as the IP register. If it is
21557 clobbered when creating the frame, we need to save and restore it. */
21558 clobber_ip
= IS_NESTED (func_type
)
21559 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21560 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21561 && !df_regs_ever_live_p (LR_REGNUM
)
21562 && arm_r3_live_at_start_p ()));
21564 /* Find somewhere to store IP whilst the frame is being created.
21565 We try the following places in order:
21567 1. The last argument register r3 if it is available.
21568 2. A slot on the stack above the frame if there are no
21569 arguments to push onto the stack.
21570 3. Register r3 again, after pushing the argument registers
21571 onto the stack, if this is a varargs function.
21572 4. The last slot on the stack created for the arguments to
21573 push, if this isn't a varargs function.
21575 Note - we only need to tell the dwarf2 backend about the SP
21576 adjustment in the second variant; the static chain register
21577 doesn't need to be unwound, as it doesn't contain a value
21578 inherited from the caller. */
21581 if (!arm_r3_live_at_start_p ())
21582 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21583 else if (args_to_push
== 0)
21587 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21590 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21591 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21594 /* Just tell the dwarf backend that we adjusted SP. */
21595 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21596 plus_constant (Pmode
, stack_pointer_rtx
,
21598 RTX_FRAME_RELATED_P (insn
) = 1;
21599 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21603 /* Store the args on the stack. */
21604 if (cfun
->machine
->uses_anonymous_args
)
21606 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21607 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21608 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21609 saved_pretend_args
= 1;
21615 if (args_to_push
== 4)
21616 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21618 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21619 plus_constant (Pmode
,
21623 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21625 /* Just tell the dwarf backend that we adjusted SP. */
21626 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21627 plus_constant (Pmode
, stack_pointer_rtx
,
21629 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21632 RTX_FRAME_RELATED_P (insn
) = 1;
21633 fp_offset
= args_to_push
;
21638 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21640 if (IS_INTERRUPT (func_type
))
21642 /* Interrupt functions must not corrupt any registers.
21643 Creating a frame pointer however, corrupts the IP
21644 register, so we must push it first. */
21645 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21647 /* Do not set RTX_FRAME_RELATED_P on this insn.
21648 The dwarf stack unwinding code only wants to see one
21649 stack decrement per function, and this is not it. If
21650 this instruction is labeled as being part of the frame
21651 creation sequence then dwarf2out_frame_debug_expr will
21652 die when it encounters the assignment of IP to FP
21653 later on, since the use of SP here establishes SP as
21654 the CFA register and not IP.
21656 Anyway this instruction is not really part of the stack
21657 frame creation although it is part of the prologue. */
21660 insn
= emit_set_insn (ip_rtx
,
21661 plus_constant (Pmode
, stack_pointer_rtx
,
21663 RTX_FRAME_RELATED_P (insn
) = 1;
21668 /* Push the argument registers, or reserve space for them. */
21669 if (cfun
->machine
->uses_anonymous_args
)
21670 insn
= emit_multi_reg_push
21671 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21672 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21675 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21676 GEN_INT (- args_to_push
)));
21677 RTX_FRAME_RELATED_P (insn
) = 1;
21680 /* If this is an interrupt service routine, and the link register
21681 is going to be pushed, and we're not generating extra
21682 push of IP (needed when frame is needed and frame layout if apcs),
21683 subtracting four from LR now will mean that the function return
21684 can be done with a single instruction. */
21685 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21686 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21687 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21690 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21692 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21695 if (live_regs_mask
)
21697 unsigned long dwarf_regs_mask
= live_regs_mask
;
21699 saved_regs
+= bit_count (live_regs_mask
) * 4;
21700 if (optimize_size
&& !frame_pointer_needed
21701 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21703 /* If no coprocessor registers are being pushed and we don't have
21704 to worry about a frame pointer then push extra registers to
21705 create the stack frame. This is done is a way that does not
21706 alter the frame layout, so is independent of the epilogue. */
21710 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21712 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21713 if (frame
&& n
* 4 >= frame
)
21716 live_regs_mask
|= (1 << n
) - 1;
21717 saved_regs
+= frame
;
21722 && current_tune
->prefer_ldrd_strd
21723 && !optimize_function_for_size_p (cfun
))
21725 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21727 thumb2_emit_strd_push (live_regs_mask
);
21728 else if (TARGET_ARM
21729 && !TARGET_APCS_FRAME
21730 && !IS_INTERRUPT (func_type
))
21731 arm_emit_strd_push (live_regs_mask
);
21734 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21735 RTX_FRAME_RELATED_P (insn
) = 1;
21740 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21741 RTX_FRAME_RELATED_P (insn
) = 1;
21745 if (! IS_VOLATILE (func_type
))
21746 saved_regs
+= arm_save_coproc_regs ();
21748 if (frame_pointer_needed
&& TARGET_ARM
)
21750 /* Create the new frame pointer. */
21751 if (TARGET_APCS_FRAME
)
21753 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21754 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21755 RTX_FRAME_RELATED_P (insn
) = 1;
21759 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21760 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21761 stack_pointer_rtx
, insn
));
21762 RTX_FRAME_RELATED_P (insn
) = 1;
21766 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21767 if (flag_stack_usage_info
)
21768 current_function_static_stack_size
= size
;
21770 /* If this isn't an interrupt service routine and we have a frame, then do
21771 stack checking. We use IP as the first scratch register, except for the
21772 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21773 if (!IS_INTERRUPT (func_type
)
21774 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21776 unsigned int regno
;
21778 if (!IS_NESTED (func_type
) || clobber_ip
)
21780 else if (df_regs_ever_live_p (LR_REGNUM
))
21785 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21787 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21788 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21789 size
- STACK_CHECK_PROTECT
,
21790 regno
, live_regs_mask
);
21793 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21794 regno
, live_regs_mask
);
21797 /* Recover the static chain register. */
21800 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21801 insn
= gen_rtx_REG (SImode
, 3);
21804 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21805 insn
= gen_frame_mem (SImode
, insn
);
21807 emit_set_insn (ip_rtx
, insn
);
21808 emit_insn (gen_force_register_use (ip_rtx
));
21811 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21813 /* This add can produce multiple insns for a large constant, so we
21814 need to get tricky. */
21815 rtx_insn
*last
= get_last_insn ();
21817 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21818 - offsets
->outgoing_args
);
21820 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21824 last
= last
? NEXT_INSN (last
) : get_insns ();
21825 RTX_FRAME_RELATED_P (last
) = 1;
21827 while (last
!= insn
);
21829 /* If the frame pointer is needed, emit a special barrier that
21830 will prevent the scheduler from moving stores to the frame
21831 before the stack adjustment. */
21832 if (frame_pointer_needed
)
21833 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21834 hard_frame_pointer_rtx
));
21838 if (frame_pointer_needed
&& TARGET_THUMB2
)
21839 thumb_set_frame_pointer (offsets
);
21841 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21843 unsigned long mask
;
21845 mask
= live_regs_mask
;
21846 mask
&= THUMB2_WORK_REGS
;
21847 if (!IS_NESTED (func_type
))
21848 mask
|= (1 << IP_REGNUM
);
21849 arm_load_pic_register (mask
);
21852 /* If we are profiling, make sure no instructions are scheduled before
21853 the call to mcount. Similarly if the user has requested no
21854 scheduling in the prolog. Similarly if we want non-call exceptions
21855 using the EABI unwinder, to prevent faulting instructions from being
21856 swapped with a stack adjustment. */
21857 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21858 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21859 && cfun
->can_throw_non_call_exceptions
))
21860 emit_insn (gen_blockage ());
21862 /* If the link register is being kept alive, with the return address in it,
21863 then make sure that it does not get reused by the ce2 pass. */
21864 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21865 cfun
->machine
->lr_save_eliminated
= 1;
21868 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21870 arm_print_condition (FILE *stream
)
21872 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21874 /* Branch conversion is not implemented for Thumb-2. */
21877 output_operand_lossage ("predicated Thumb instruction");
21880 if (current_insn_predicate
!= NULL
)
21882 output_operand_lossage
21883 ("predicated instruction in conditional sequence");
21887 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21889 else if (current_insn_predicate
)
21891 enum arm_cond_code code
;
21895 output_operand_lossage ("predicated Thumb instruction");
21899 code
= get_arm_condition_code (current_insn_predicate
);
21900 fputs (arm_condition_codes
[code
], stream
);
21905 /* Globally reserved letters: acln
21906 Puncutation letters currently used: @_|?().!#
21907 Lower case letters currently used: bcdefhimpqtvwxyz
21908 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21909 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21911 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21913 If CODE is 'd', then the X is a condition operand and the instruction
21914 should only be executed if the condition is true.
21915 if CODE is 'D', then the X is a condition operand and the instruction
21916 should only be executed if the condition is false: however, if the mode
21917 of the comparison is CCFPEmode, then always execute the instruction -- we
21918 do this because in these circumstances !GE does not necessarily imply LT;
21919 in these cases the instruction pattern will take care to make sure that
21920 an instruction containing %d will follow, thereby undoing the effects of
21921 doing this instruction unconditionally.
21922 If CODE is 'N' then X is a floating point operand that must be negated
21924 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21925 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21927 arm_print_operand (FILE *stream
, rtx x
, int code
)
21932 fputs (ASM_COMMENT_START
, stream
);
21936 fputs (user_label_prefix
, stream
);
21940 fputs (REGISTER_PREFIX
, stream
);
21944 arm_print_condition (stream
);
21948 /* The current condition code for a condition code setting instruction.
21949 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21950 fputc('s', stream
);
21951 arm_print_condition (stream
);
21955 /* If the instruction is conditionally executed then print
21956 the current condition code, otherwise print 's'. */
21957 gcc_assert (TARGET_THUMB2
);
21958 if (current_insn_predicate
)
21959 arm_print_condition (stream
);
21961 fputc('s', stream
);
21964 /* %# is a "break" sequence. It doesn't output anything, but is used to
21965 separate e.g. operand numbers from following text, if that text consists
21966 of further digits which we don't want to be part of the operand
21974 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21975 fprintf (stream
, "%s", fp_const_from_val (&r
));
21979 /* An integer or symbol address without a preceding # sign. */
21981 switch (GET_CODE (x
))
21984 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21988 output_addr_const (stream
, x
);
21992 if (GET_CODE (XEXP (x
, 0)) == PLUS
21993 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21995 output_addr_const (stream
, x
);
21998 /* Fall through. */
22001 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22005 /* An integer that we want to print in HEX. */
22007 switch (GET_CODE (x
))
22010 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
22014 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22019 if (CONST_INT_P (x
))
22022 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
22023 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
22027 putc ('~', stream
);
22028 output_addr_const (stream
, x
);
22033 /* Print the log2 of a CONST_INT. */
22037 if (!CONST_INT_P (x
)
22038 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
22039 output_operand_lossage ("Unsupported operand for code '%c'", code
);
22041 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22046 /* The low 16 bits of an immediate constant. */
22047 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
22051 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
22055 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
22063 shift
= shift_op (x
, &val
);
22067 fprintf (stream
, ", %s ", shift
);
22069 arm_print_operand (stream
, XEXP (x
, 1), 0);
22071 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22076 /* An explanation of the 'Q', 'R' and 'H' register operands:
22078 In a pair of registers containing a DI or DF value the 'Q'
22079 operand returns the register number of the register containing
22080 the least significant part of the value. The 'R' operand returns
22081 the register number of the register containing the most
22082 significant part of the value.
22084 The 'H' operand returns the higher of the two register numbers.
22085 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22086 same as the 'Q' operand, since the most significant part of the
22087 value is held in the lower number register. The reverse is true
22088 on systems where WORDS_BIG_ENDIAN is false.
22090 The purpose of these operands is to distinguish between cases
22091 where the endian-ness of the values is important (for example
22092 when they are added together), and cases where the endian-ness
22093 is irrelevant, but the order of register operations is important.
22094 For example when loading a value from memory into a register
22095 pair, the endian-ness does not matter. Provided that the value
22096 from the lower memory address is put into the lower numbered
22097 register, and the value from the higher address is put into the
22098 higher numbered register, the load will work regardless of whether
22099 the value being loaded is big-wordian or little-wordian. The
22100 order of the two register loads can matter however, if the address
22101 of the memory location is actually held in one of the registers
22102 being overwritten by the load.
22104 The 'Q' and 'R' constraints are also available for 64-bit
22107 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22109 rtx part
= gen_lowpart (SImode
, x
);
22110 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22114 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22116 output_operand_lossage ("invalid operand for code '%c'", code
);
22120 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22124 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22126 machine_mode mode
= GET_MODE (x
);
22129 if (mode
== VOIDmode
)
22131 part
= gen_highpart_mode (SImode
, mode
, x
);
22132 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22136 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22138 output_operand_lossage ("invalid operand for code '%c'", code
);
22142 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22146 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22148 output_operand_lossage ("invalid operand for code '%c'", code
);
22152 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22156 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22158 output_operand_lossage ("invalid operand for code '%c'", code
);
22162 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22166 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22168 output_operand_lossage ("invalid operand for code '%c'", code
);
22172 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22176 asm_fprintf (stream
, "%r",
22177 REG_P (XEXP (x
, 0))
22178 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22182 asm_fprintf (stream
, "{%r-%r}",
22184 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22187 /* Like 'M', but writing doubleword vector registers, for use by Neon
22191 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22192 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22194 asm_fprintf (stream
, "{d%d}", regno
);
22196 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22201 /* CONST_TRUE_RTX means always -- that's the default. */
22202 if (x
== const_true_rtx
)
22205 if (!COMPARISON_P (x
))
22207 output_operand_lossage ("invalid operand for code '%c'", code
);
22211 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22216 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22217 want to do that. */
22218 if (x
== const_true_rtx
)
22220 output_operand_lossage ("instruction never executed");
22223 if (!COMPARISON_P (x
))
22225 output_operand_lossage ("invalid operand for code '%c'", code
);
22229 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22230 (get_arm_condition_code (x
))],
22240 /* Former Maverick support, removed after GCC-4.7. */
22241 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22246 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22247 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22248 /* Bad value for wCG register number. */
22250 output_operand_lossage ("invalid operand for code '%c'", code
);
22255 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22258 /* Print an iWMMXt control register name. */
22260 if (!CONST_INT_P (x
)
22262 || INTVAL (x
) >= 16)
22263 /* Bad value for wC register number. */
22265 output_operand_lossage ("invalid operand for code '%c'", code
);
22271 static const char * wc_reg_names
[16] =
22273 "wCID", "wCon", "wCSSF", "wCASF",
22274 "wC4", "wC5", "wC6", "wC7",
22275 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22276 "wC12", "wC13", "wC14", "wC15"
22279 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22283 /* Print the high single-precision register of a VFP double-precision
22287 machine_mode mode
= GET_MODE (x
);
22290 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22292 output_operand_lossage ("invalid operand for code '%c'", code
);
22297 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22299 output_operand_lossage ("invalid operand for code '%c'", code
);
22303 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22307 /* Print a VFP/Neon double precision or quad precision register name. */
22311 machine_mode mode
= GET_MODE (x
);
22312 int is_quad
= (code
== 'q');
22315 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22317 output_operand_lossage ("invalid operand for code '%c'", code
);
22322 || !IS_VFP_REGNUM (REGNO (x
)))
22324 output_operand_lossage ("invalid operand for code '%c'", code
);
22329 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22330 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22332 output_operand_lossage ("invalid operand for code '%c'", code
);
22336 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22337 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22341 /* These two codes print the low/high doubleword register of a Neon quad
22342 register, respectively. For pair-structure types, can also print
22343 low/high quadword registers. */
22347 machine_mode mode
= GET_MODE (x
);
22350 if ((GET_MODE_SIZE (mode
) != 16
22351 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22353 output_operand_lossage ("invalid operand for code '%c'", code
);
22358 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22360 output_operand_lossage ("invalid operand for code '%c'", code
);
22364 if (GET_MODE_SIZE (mode
) == 16)
22365 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22366 + (code
== 'f' ? 1 : 0));
22368 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22369 + (code
== 'f' ? 1 : 0));
22373 /* Print a VFPv3 floating-point constant, represented as an integer
22377 int index
= vfp3_const_double_index (x
);
22378 gcc_assert (index
!= -1);
22379 fprintf (stream
, "%d", index
);
22383 /* Print bits representing opcode features for Neon.
22385 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22386 and polynomials as unsigned.
22388 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22390 Bit 2 is 1 for rounding functions, 0 otherwise. */
22392 /* Identify the type as 's', 'u', 'p' or 'f'. */
22395 HOST_WIDE_INT bits
= INTVAL (x
);
22396 fputc ("uspf"[bits
& 3], stream
);
22400 /* Likewise, but signed and unsigned integers are both 'i'. */
22403 HOST_WIDE_INT bits
= INTVAL (x
);
22404 fputc ("iipf"[bits
& 3], stream
);
22408 /* As for 'T', but emit 'u' instead of 'p'. */
22411 HOST_WIDE_INT bits
= INTVAL (x
);
22412 fputc ("usuf"[bits
& 3], stream
);
22416 /* Bit 2: rounding (vs none). */
22419 HOST_WIDE_INT bits
= INTVAL (x
);
22420 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22424 /* Memory operand for vld1/vst1 instruction. */
22428 bool postinc
= FALSE
;
22429 rtx postinc_reg
= NULL
;
22430 unsigned align
, memsize
, align_bits
;
22432 gcc_assert (MEM_P (x
));
22433 addr
= XEXP (x
, 0);
22434 if (GET_CODE (addr
) == POST_INC
)
22437 addr
= XEXP (addr
, 0);
22439 if (GET_CODE (addr
) == POST_MODIFY
)
22441 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22442 addr
= XEXP (addr
, 0);
22444 asm_fprintf (stream
, "[%r", REGNO (addr
));
22446 /* We know the alignment of this access, so we can emit a hint in the
22447 instruction (for some alignments) as an aid to the memory subsystem
22449 align
= MEM_ALIGN (x
) >> 3;
22450 memsize
= MEM_SIZE (x
);
22452 /* Only certain alignment specifiers are supported by the hardware. */
22453 if (memsize
== 32 && (align
% 32) == 0)
22455 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22457 else if (memsize
>= 8 && (align
% 8) == 0)
22462 if (align_bits
!= 0)
22463 asm_fprintf (stream
, ":%d", align_bits
);
22465 asm_fprintf (stream
, "]");
22468 fputs("!", stream
);
22470 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22478 gcc_assert (MEM_P (x
));
22479 addr
= XEXP (x
, 0);
22480 gcc_assert (REG_P (addr
));
22481 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22485 /* Translate an S register number into a D register number and element index. */
22488 machine_mode mode
= GET_MODE (x
);
22491 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22493 output_operand_lossage ("invalid operand for code '%c'", code
);
22498 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22500 output_operand_lossage ("invalid operand for code '%c'", code
);
22504 regno
= regno
- FIRST_VFP_REGNUM
;
22505 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22510 gcc_assert (CONST_DOUBLE_P (x
));
22512 result
= vfp3_const_double_for_fract_bits (x
);
22514 result
= vfp3_const_double_for_bits (x
);
22515 fprintf (stream
, "#%d", result
);
22518 /* Register specifier for vld1.16/vst1.16. Translate the S register
22519 number into a D register number and element index. */
22522 machine_mode mode
= GET_MODE (x
);
22525 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22527 output_operand_lossage ("invalid operand for code '%c'", code
);
22532 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22534 output_operand_lossage ("invalid operand for code '%c'", code
);
22538 regno
= regno
- FIRST_VFP_REGNUM
;
22539 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22546 output_operand_lossage ("missing operand");
22550 switch (GET_CODE (x
))
22553 asm_fprintf (stream
, "%r", REGNO (x
));
22557 output_address (GET_MODE (x
), XEXP (x
, 0));
22563 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22564 sizeof (fpstr
), 0, 1);
22565 fprintf (stream
, "#%s", fpstr
);
22570 gcc_assert (GET_CODE (x
) != NEG
);
22571 fputc ('#', stream
);
22572 if (GET_CODE (x
) == HIGH
)
22574 fputs (":lower16:", stream
);
22578 output_addr_const (stream
, x
);
22584 /* Target hook for printing a memory address. */
22586 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22590 int is_minus
= GET_CODE (x
) == MINUS
;
22593 asm_fprintf (stream
, "[%r]", REGNO (x
));
22594 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22596 rtx base
= XEXP (x
, 0);
22597 rtx index
= XEXP (x
, 1);
22598 HOST_WIDE_INT offset
= 0;
22600 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22602 /* Ensure that BASE is a register. */
22603 /* (one of them must be). */
22604 /* Also ensure the SP is not used as in index register. */
22605 std::swap (base
, index
);
22607 switch (GET_CODE (index
))
22610 offset
= INTVAL (index
);
22613 asm_fprintf (stream
, "[%r, #%wd]",
22614 REGNO (base
), offset
);
22618 asm_fprintf (stream
, "[%r, %s%r]",
22619 REGNO (base
), is_minus
? "-" : "",
22629 asm_fprintf (stream
, "[%r, %s%r",
22630 REGNO (base
), is_minus
? "-" : "",
22631 REGNO (XEXP (index
, 0)));
22632 arm_print_operand (stream
, index
, 'S');
22633 fputs ("]", stream
);
22638 gcc_unreachable ();
22641 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22642 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22644 gcc_assert (REG_P (XEXP (x
, 0)));
22646 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22647 asm_fprintf (stream
, "[%r, #%s%d]!",
22648 REGNO (XEXP (x
, 0)),
22649 GET_CODE (x
) == PRE_DEC
? "-" : "",
22650 GET_MODE_SIZE (mode
));
22652 asm_fprintf (stream
, "[%r], #%s%d",
22653 REGNO (XEXP (x
, 0)),
22654 GET_CODE (x
) == POST_DEC
? "-" : "",
22655 GET_MODE_SIZE (mode
));
22657 else if (GET_CODE (x
) == PRE_MODIFY
)
22659 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22660 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22661 asm_fprintf (stream
, "#%wd]!",
22662 INTVAL (XEXP (XEXP (x
, 1), 1)));
22664 asm_fprintf (stream
, "%r]!",
22665 REGNO (XEXP (XEXP (x
, 1), 1)));
22667 else if (GET_CODE (x
) == POST_MODIFY
)
22669 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22670 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22671 asm_fprintf (stream
, "#%wd",
22672 INTVAL (XEXP (XEXP (x
, 1), 1)));
22674 asm_fprintf (stream
, "%r",
22675 REGNO (XEXP (XEXP (x
, 1), 1)));
22677 else output_addr_const (stream
, x
);
22682 asm_fprintf (stream
, "[%r]", REGNO (x
));
22683 else if (GET_CODE (x
) == POST_INC
)
22684 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22685 else if (GET_CODE (x
) == PLUS
)
22687 gcc_assert (REG_P (XEXP (x
, 0)));
22688 if (CONST_INT_P (XEXP (x
, 1)))
22689 asm_fprintf (stream
, "[%r, #%wd]",
22690 REGNO (XEXP (x
, 0)),
22691 INTVAL (XEXP (x
, 1)));
22693 asm_fprintf (stream
, "[%r, %r]",
22694 REGNO (XEXP (x
, 0)),
22695 REGNO (XEXP (x
, 1)));
22698 output_addr_const (stream
, x
);
22702 /* Target hook for indicating whether a punctuation character for
22703 TARGET_PRINT_OPERAND is valid. */
22705 arm_print_operand_punct_valid_p (unsigned char code
)
22707 return (code
== '@' || code
== '|' || code
== '.'
22708 || code
== '(' || code
== ')' || code
== '#'
22709 || (TARGET_32BIT
&& (code
== '?'))
22710 || (TARGET_THUMB2
&& (code
== '!'))
22711 || (TARGET_THUMB
&& (code
== '_')));
22714 /* Target hook for assembling integer objects. The ARM version needs to
22715 handle word-sized values specially. */
22717 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22721 if (size
== UNITS_PER_WORD
&& aligned_p
)
22723 fputs ("\t.word\t", asm_out_file
);
22724 output_addr_const (asm_out_file
, x
);
22726 /* Mark symbols as position independent. We only do this in the
22727 .text segment, not in the .data segment. */
22728 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22729 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22731 /* See legitimize_pic_address for an explanation of the
22732 TARGET_VXWORKS_RTP check. */
22733 if (!arm_pic_data_is_text_relative
22734 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22735 fputs ("(GOT)", asm_out_file
);
22737 fputs ("(GOTOFF)", asm_out_file
);
22739 fputc ('\n', asm_out_file
);
22743 mode
= GET_MODE (x
);
22745 if (arm_vector_mode_supported_p (mode
))
22749 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22751 units
= CONST_VECTOR_NUNITS (x
);
22752 size
= GET_MODE_UNIT_SIZE (mode
);
22754 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22755 for (i
= 0; i
< units
; i
++)
22757 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22759 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22762 for (i
= 0; i
< units
; i
++)
22764 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22766 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
22767 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22773 return default_assemble_integer (x
, size
, aligned_p
);
22777 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22781 if (!TARGET_AAPCS_BASED
)
22784 default_named_section_asm_out_constructor
22785 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22789 /* Put these in the .init_array section, using a special relocation. */
22790 if (priority
!= DEFAULT_INIT_PRIORITY
)
22793 sprintf (buf
, "%s.%.5u",
22794 is_ctor
? ".init_array" : ".fini_array",
22796 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22803 switch_to_section (s
);
22804 assemble_align (POINTER_SIZE
);
22805 fputs ("\t.word\t", asm_out_file
);
22806 output_addr_const (asm_out_file
, symbol
);
22807 fputs ("(target1)\n", asm_out_file
);
22810 /* Add a function to the list of static constructors. */
22813 arm_elf_asm_constructor (rtx symbol
, int priority
)
22815 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22818 /* Add a function to the list of static destructors. */
22821 arm_elf_asm_destructor (rtx symbol
, int priority
)
22823 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22826 /* A finite state machine takes care of noticing whether or not instructions
22827 can be conditionally executed, and thus decrease execution time and code
22828 size by deleting branch instructions. The fsm is controlled by
22829 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22831 /* The state of the fsm controlling condition codes are:
22832 0: normal, do nothing special
22833 1: make ASM_OUTPUT_OPCODE not output this instruction
22834 2: make ASM_OUTPUT_OPCODE not output this instruction
22835 3: make instructions conditional
22836 4: make instructions conditional
22838 State transitions (state->state by whom under condition):
22839 0 -> 1 final_prescan_insn if the `target' is a label
22840 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22841 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22842 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22843 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22844 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22845 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22846 (the target insn is arm_target_insn).
22848 If the jump clobbers the conditions then we use states 2 and 4.
22850 A similar thing can be done with conditional return insns.
22852 XXX In case the `target' is an unconditional branch, this conditionalising
22853 of the instructions always reduces code size, but not always execution
22854 time. But then, I want to reduce the code size to somewhere near what
22855 /bin/cc produces. */
22857 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22858 instructions. When a COND_EXEC instruction is seen the subsequent
22859 instructions are scanned so that multiple conditional instructions can be
22860 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22861 specify the length and true/false mask for the IT block. These will be
22862 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22864 /* Returns the index of the ARM condition code string in
22865 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22866 COMPARISON should be an rtx like `(eq (...) (...))'. */
22869 maybe_get_arm_condition_code (rtx comparison
)
22871 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22872 enum arm_cond_code code
;
22873 enum rtx_code comp_code
= GET_CODE (comparison
);
22875 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22876 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22877 XEXP (comparison
, 1));
22881 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22882 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22883 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22884 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22885 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22886 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22887 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22888 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22889 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22890 case CC_DLTUmode
: code
= ARM_CC
;
22893 if (comp_code
== EQ
)
22894 return ARM_INVERSE_CONDITION_CODE (code
);
22895 if (comp_code
== NE
)
22902 case NE
: return ARM_NE
;
22903 case EQ
: return ARM_EQ
;
22904 case GE
: return ARM_PL
;
22905 case LT
: return ARM_MI
;
22906 default: return ARM_NV
;
22912 case NE
: return ARM_NE
;
22913 case EQ
: return ARM_EQ
;
22914 default: return ARM_NV
;
22920 case NE
: return ARM_MI
;
22921 case EQ
: return ARM_PL
;
22922 default: return ARM_NV
;
22927 /* We can handle all cases except UNEQ and LTGT. */
22930 case GE
: return ARM_GE
;
22931 case GT
: return ARM_GT
;
22932 case LE
: return ARM_LS
;
22933 case LT
: return ARM_MI
;
22934 case NE
: return ARM_NE
;
22935 case EQ
: return ARM_EQ
;
22936 case ORDERED
: return ARM_VC
;
22937 case UNORDERED
: return ARM_VS
;
22938 case UNLT
: return ARM_LT
;
22939 case UNLE
: return ARM_LE
;
22940 case UNGT
: return ARM_HI
;
22941 case UNGE
: return ARM_PL
;
22942 /* UNEQ and LTGT do not have a representation. */
22943 case UNEQ
: /* Fall through. */
22944 case LTGT
: /* Fall through. */
22945 default: return ARM_NV
;
22951 case NE
: return ARM_NE
;
22952 case EQ
: return ARM_EQ
;
22953 case GE
: return ARM_LE
;
22954 case GT
: return ARM_LT
;
22955 case LE
: return ARM_GE
;
22956 case LT
: return ARM_GT
;
22957 case GEU
: return ARM_LS
;
22958 case GTU
: return ARM_CC
;
22959 case LEU
: return ARM_CS
;
22960 case LTU
: return ARM_HI
;
22961 default: return ARM_NV
;
22967 case LTU
: return ARM_CS
;
22968 case GEU
: return ARM_CC
;
22969 default: return ARM_NV
;
22975 case NE
: return ARM_NE
;
22976 case EQ
: return ARM_EQ
;
22977 case GEU
: return ARM_CS
;
22978 case GTU
: return ARM_HI
;
22979 case LEU
: return ARM_LS
;
22980 case LTU
: return ARM_CC
;
22981 default: return ARM_NV
;
22987 case GE
: return ARM_GE
;
22988 case LT
: return ARM_LT
;
22989 case GEU
: return ARM_CS
;
22990 case LTU
: return ARM_CC
;
22991 default: return ARM_NV
;
22997 case NE
: return ARM_NE
;
22998 case EQ
: return ARM_EQ
;
22999 case GE
: return ARM_GE
;
23000 case GT
: return ARM_GT
;
23001 case LE
: return ARM_LE
;
23002 case LT
: return ARM_LT
;
23003 case GEU
: return ARM_CS
;
23004 case GTU
: return ARM_HI
;
23005 case LEU
: return ARM_LS
;
23006 case LTU
: return ARM_CC
;
23007 default: return ARM_NV
;
23010 default: gcc_unreachable ();
23014 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23015 static enum arm_cond_code
23016 get_arm_condition_code (rtx comparison
)
23018 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
23019 gcc_assert (code
!= ARM_NV
);
23023 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23026 thumb2_final_prescan_insn (rtx_insn
*insn
)
23028 rtx_insn
*first_insn
= insn
;
23029 rtx body
= PATTERN (insn
);
23031 enum arm_cond_code code
;
23036 /* max_insns_skipped in the tune was already taken into account in the
23037 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23038 just emit the IT blocks as we can. It does not make sense to split
23040 max
= MAX_INSN_PER_IT_BLOCK
;
23042 /* Remove the previous insn from the count of insns to be output. */
23043 if (arm_condexec_count
)
23044 arm_condexec_count
--;
23046 /* Nothing to do if we are already inside a conditional block. */
23047 if (arm_condexec_count
)
23050 if (GET_CODE (body
) != COND_EXEC
)
23053 /* Conditional jumps are implemented directly. */
23057 predicate
= COND_EXEC_TEST (body
);
23058 arm_current_cc
= get_arm_condition_code (predicate
);
23060 n
= get_attr_ce_count (insn
);
23061 arm_condexec_count
= 1;
23062 arm_condexec_mask
= (1 << n
) - 1;
23063 arm_condexec_masklen
= n
;
23064 /* See if subsequent instructions can be combined into the same block. */
23067 insn
= next_nonnote_insn (insn
);
23069 /* Jumping into the middle of an IT block is illegal, so a label or
23070 barrier terminates the block. */
23071 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23074 body
= PATTERN (insn
);
23075 /* USE and CLOBBER aren't really insns, so just skip them. */
23076 if (GET_CODE (body
) == USE
23077 || GET_CODE (body
) == CLOBBER
)
23080 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23081 if (GET_CODE (body
) != COND_EXEC
)
23083 /* Maximum number of conditionally executed instructions in a block. */
23084 n
= get_attr_ce_count (insn
);
23085 if (arm_condexec_masklen
+ n
> max
)
23088 predicate
= COND_EXEC_TEST (body
);
23089 code
= get_arm_condition_code (predicate
);
23090 mask
= (1 << n
) - 1;
23091 if (arm_current_cc
== code
)
23092 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23093 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23096 arm_condexec_count
++;
23097 arm_condexec_masklen
+= n
;
23099 /* A jump must be the last instruction in a conditional block. */
23103 /* Restore recog_data (getting the attributes of other insns can
23104 destroy this array, but final.c assumes that it remains intact
23105 across this call). */
23106 extract_constrain_insn_cached (first_insn
);
23110 arm_final_prescan_insn (rtx_insn
*insn
)
23112 /* BODY will hold the body of INSN. */
23113 rtx body
= PATTERN (insn
);
23115 /* This will be 1 if trying to repeat the trick, and things need to be
23116 reversed if it appears to fail. */
23119 /* If we start with a return insn, we only succeed if we find another one. */
23120 int seeking_return
= 0;
23121 enum rtx_code return_code
= UNKNOWN
;
23123 /* START_INSN will hold the insn from where we start looking. This is the
23124 first insn after the following code_label if REVERSE is true. */
23125 rtx_insn
*start_insn
= insn
;
23127 /* If in state 4, check if the target branch is reached, in order to
23128 change back to state 0. */
23129 if (arm_ccfsm_state
== 4)
23131 if (insn
== arm_target_insn
)
23133 arm_target_insn
= NULL
;
23134 arm_ccfsm_state
= 0;
23139 /* If in state 3, it is possible to repeat the trick, if this insn is an
23140 unconditional branch to a label, and immediately following this branch
23141 is the previous target label which is only used once, and the label this
23142 branch jumps to is not too far off. */
23143 if (arm_ccfsm_state
== 3)
23145 if (simplejump_p (insn
))
23147 start_insn
= next_nonnote_insn (start_insn
);
23148 if (BARRIER_P (start_insn
))
23150 /* XXX Isn't this always a barrier? */
23151 start_insn
= next_nonnote_insn (start_insn
);
23153 if (LABEL_P (start_insn
)
23154 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23155 && LABEL_NUSES (start_insn
) == 1)
23160 else if (ANY_RETURN_P (body
))
23162 start_insn
= next_nonnote_insn (start_insn
);
23163 if (BARRIER_P (start_insn
))
23164 start_insn
= next_nonnote_insn (start_insn
);
23165 if (LABEL_P (start_insn
)
23166 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23167 && LABEL_NUSES (start_insn
) == 1)
23170 seeking_return
= 1;
23171 return_code
= GET_CODE (body
);
23180 gcc_assert (!arm_ccfsm_state
|| reverse
);
23181 if (!JUMP_P (insn
))
23184 /* This jump might be paralleled with a clobber of the condition codes
23185 the jump should always come first */
23186 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23187 body
= XVECEXP (body
, 0, 0);
23190 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23191 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23194 int fail
= FALSE
, succeed
= FALSE
;
23195 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23196 int then_not_else
= TRUE
;
23197 rtx_insn
*this_insn
= start_insn
;
23200 /* Register the insn jumped to. */
23203 if (!seeking_return
)
23204 label
= XEXP (SET_SRC (body
), 0);
23206 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23207 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23208 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23210 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23211 then_not_else
= FALSE
;
23213 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23215 seeking_return
= 1;
23216 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23218 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23220 seeking_return
= 1;
23221 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23222 then_not_else
= FALSE
;
23225 gcc_unreachable ();
23227 /* See how many insns this branch skips, and what kind of insns. If all
23228 insns are okay, and the label or unconditional branch to the same
23229 label is not too far away, succeed. */
23230 for (insns_skipped
= 0;
23231 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23235 this_insn
= next_nonnote_insn (this_insn
);
23239 switch (GET_CODE (this_insn
))
23242 /* Succeed if it is the target label, otherwise fail since
23243 control falls in from somewhere else. */
23244 if (this_insn
== label
)
23246 arm_ccfsm_state
= 1;
23254 /* Succeed if the following insn is the target label.
23256 If return insns are used then the last insn in a function
23257 will be a barrier. */
23258 this_insn
= next_nonnote_insn (this_insn
);
23259 if (this_insn
&& this_insn
== label
)
23261 arm_ccfsm_state
= 1;
23269 /* The AAPCS says that conditional calls should not be
23270 used since they make interworking inefficient (the
23271 linker can't transform BL<cond> into BLX). That's
23272 only a problem if the machine has BLX. */
23279 /* Succeed if the following insn is the target label, or
23280 if the following two insns are a barrier and the
23282 this_insn
= next_nonnote_insn (this_insn
);
23283 if (this_insn
&& BARRIER_P (this_insn
))
23284 this_insn
= next_nonnote_insn (this_insn
);
23286 if (this_insn
&& this_insn
== label
23287 && insns_skipped
< max_insns_skipped
)
23289 arm_ccfsm_state
= 1;
23297 /* If this is an unconditional branch to the same label, succeed.
23298 If it is to another label, do nothing. If it is conditional,
23300 /* XXX Probably, the tests for SET and the PC are
23303 scanbody
= PATTERN (this_insn
);
23304 if (GET_CODE (scanbody
) == SET
23305 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23307 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23308 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23310 arm_ccfsm_state
= 2;
23313 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23316 /* Fail if a conditional return is undesirable (e.g. on a
23317 StrongARM), but still allow this if optimizing for size. */
23318 else if (GET_CODE (scanbody
) == return_code
23319 && !use_return_insn (TRUE
, NULL
)
23322 else if (GET_CODE (scanbody
) == return_code
)
23324 arm_ccfsm_state
= 2;
23327 else if (GET_CODE (scanbody
) == PARALLEL
)
23329 switch (get_attr_conds (this_insn
))
23339 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23344 /* Instructions using or affecting the condition codes make it
23346 scanbody
= PATTERN (this_insn
);
23347 if (!(GET_CODE (scanbody
) == SET
23348 || GET_CODE (scanbody
) == PARALLEL
)
23349 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23359 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23360 arm_target_label
= CODE_LABEL_NUMBER (label
);
23363 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23365 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23367 this_insn
= next_nonnote_insn (this_insn
);
23368 gcc_assert (!this_insn
23369 || (!BARRIER_P (this_insn
)
23370 && !LABEL_P (this_insn
)));
23374 /* Oh, dear! we ran off the end.. give up. */
23375 extract_constrain_insn_cached (insn
);
23376 arm_ccfsm_state
= 0;
23377 arm_target_insn
= NULL
;
23380 arm_target_insn
= this_insn
;
23383 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23386 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23388 if (reverse
|| then_not_else
)
23389 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23392 /* Restore recog_data (getting the attributes of other insns can
23393 destroy this array, but final.c assumes that it remains intact
23394 across this call. */
23395 extract_constrain_insn_cached (insn
);
23399 /* Output IT instructions. */
23401 thumb2_asm_output_opcode (FILE * stream
)
23406 if (arm_condexec_mask
)
23408 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23409 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23411 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23412 arm_condition_codes
[arm_current_cc
]);
23413 arm_condexec_mask
= 0;
23417 /* Returns true if REGNO is a valid register
23418 for holding a quantity of type MODE. */
23420 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23422 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23423 return (regno
== CC_REGNUM
23424 || (TARGET_HARD_FLOAT
&& TARGET_VFP
23425 && regno
== VFPCC_REGNUM
));
23427 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23431 /* For the Thumb we only allow values bigger than SImode in
23432 registers 0 - 6, so that there is always a second low
23433 register available to hold the upper part of the value.
23434 We probably we ought to ensure that the register is the
23435 start of an even numbered register pair. */
23436 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23438 if (TARGET_HARD_FLOAT
&& TARGET_VFP
23439 && IS_VFP_REGNUM (regno
))
23441 if (mode
== SFmode
|| mode
== SImode
)
23442 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23444 if (mode
== DFmode
)
23445 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23447 if (mode
== HFmode
)
23448 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23451 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23452 || (VALID_NEON_QREG_MODE (mode
)
23453 && NEON_REGNO_OK_FOR_QUAD (regno
))
23454 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23455 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23456 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23457 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23458 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23463 if (TARGET_REALLY_IWMMXT
)
23465 if (IS_IWMMXT_GR_REGNUM (regno
))
23466 return mode
== SImode
;
23468 if (IS_IWMMXT_REGNUM (regno
))
23469 return VALID_IWMMXT_REG_MODE (mode
);
23472 /* We allow almost any value to be stored in the general registers.
23473 Restrict doubleword quantities to even register pairs in ARM state
23474 so that we can use ldrd. Do not allow very large Neon structure
23475 opaque modes in general registers; they would use too many. */
23476 if (regno
<= LAST_ARM_REGNUM
)
23478 if (ARM_NUM_REGS (mode
) > 4)
23484 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23487 if (regno
== FRAME_POINTER_REGNUM
23488 || regno
== ARG_POINTER_REGNUM
)
23489 /* We only allow integers in the fake hard registers. */
23490 return GET_MODE_CLASS (mode
) == MODE_INT
;
23495 /* Implement MODES_TIEABLE_P. */
23498 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23500 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23503 /* We specifically want to allow elements of "structure" modes to
23504 be tieable to the structure. This more general condition allows
23505 other rarer situations too. */
23507 && (VALID_NEON_DREG_MODE (mode1
)
23508 || VALID_NEON_QREG_MODE (mode1
)
23509 || VALID_NEON_STRUCT_MODE (mode1
))
23510 && (VALID_NEON_DREG_MODE (mode2
)
23511 || VALID_NEON_QREG_MODE (mode2
)
23512 || VALID_NEON_STRUCT_MODE (mode2
)))
23518 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23519 not used in arm mode. */
23522 arm_regno_class (int regno
)
23524 if (regno
== PC_REGNUM
)
23529 if (regno
== STACK_POINTER_REGNUM
)
23531 if (regno
== CC_REGNUM
)
23538 if (TARGET_THUMB2
&& regno
< 8)
23541 if ( regno
<= LAST_ARM_REGNUM
23542 || regno
== FRAME_POINTER_REGNUM
23543 || regno
== ARG_POINTER_REGNUM
)
23544 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23546 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23547 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23549 if (IS_VFP_REGNUM (regno
))
23551 if (regno
<= D7_VFP_REGNUM
)
23552 return VFP_D0_D7_REGS
;
23553 else if (regno
<= LAST_LO_VFP_REGNUM
)
23554 return VFP_LO_REGS
;
23556 return VFP_HI_REGS
;
23559 if (IS_IWMMXT_REGNUM (regno
))
23560 return IWMMXT_REGS
;
23562 if (IS_IWMMXT_GR_REGNUM (regno
))
23563 return IWMMXT_GR_REGS
;
23568 /* Handle a special case when computing the offset
23569 of an argument from the frame pointer. */
23571 arm_debugger_arg_offset (int value
, rtx addr
)
23575 /* We are only interested if dbxout_parms() failed to compute the offset. */
23579 /* We can only cope with the case where the address is held in a register. */
23583 /* If we are using the frame pointer to point at the argument, then
23584 an offset of 0 is correct. */
23585 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23588 /* If we are using the stack pointer to point at the
23589 argument, then an offset of 0 is correct. */
23590 /* ??? Check this is consistent with thumb2 frame layout. */
23591 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23592 && REGNO (addr
) == SP_REGNUM
)
23595 /* Oh dear. The argument is pointed to by a register rather
23596 than being held in a register, or being stored at a known
23597 offset from the frame pointer. Since GDB only understands
23598 those two kinds of argument we must translate the address
23599 held in the register into an offset from the frame pointer.
23600 We do this by searching through the insns for the function
23601 looking to see where this register gets its value. If the
23602 register is initialized from the frame pointer plus an offset
23603 then we are in luck and we can continue, otherwise we give up.
23605 This code is exercised by producing debugging information
23606 for a function with arguments like this:
23608 double func (double a, double b, int c, double d) {return d;}
23610 Without this code the stab for parameter 'd' will be set to
23611 an offset of 0 from the frame pointer, rather than 8. */
23613 /* The if() statement says:
23615 If the insn is a normal instruction
23616 and if the insn is setting the value in a register
23617 and if the register being set is the register holding the address of the argument
23618 and if the address is computing by an addition
23619 that involves adding to a register
23620 which is the frame pointer
23625 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23627 if ( NONJUMP_INSN_P (insn
)
23628 && GET_CODE (PATTERN (insn
)) == SET
23629 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23630 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23631 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23632 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23633 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23636 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23645 warning (0, "unable to compute real location of stacked parameter");
23646 value
= 8; /* XXX magic hack */
23652 /* Implement TARGET_PROMOTED_TYPE. */
23655 arm_promoted_type (const_tree t
)
23657 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23658 return float_type_node
;
23662 /* Implement TARGET_CONVERT_TO_TYPE.
23663 Specifically, this hook implements the peculiarity of the ARM
23664 half-precision floating-point C semantics that requires conversions between
23665 __fp16 to or from double to do an intermediate conversion to float. */
23668 arm_convert_to_type (tree type
, tree expr
)
23670 tree fromtype
= TREE_TYPE (expr
);
23671 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23673 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23674 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23675 return convert (type
, convert (float_type_node
, expr
));
23679 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23680 This simply adds HFmode as a supported mode; even though we don't
23681 implement arithmetic on this type directly, it's supported by
23682 optabs conversions, much the way the double-word arithmetic is
23683 special-cased in the default hook. */
23686 arm_scalar_mode_supported_p (machine_mode mode
)
23688 if (mode
== HFmode
)
23689 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23690 else if (ALL_FIXED_POINT_MODE_P (mode
))
23693 return default_scalar_mode_supported_p (mode
);
23696 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23697 not to early-clobber SRC registers in the process.
23699 We assume that the operands described by SRC and DEST represent a
23700 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23701 number of components into which the copy has been decomposed. */
23703 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23707 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23708 || REGNO (operands
[0]) < REGNO (operands
[1]))
23710 for (i
= 0; i
< count
; i
++)
23712 operands
[2 * i
] = dest
[i
];
23713 operands
[2 * i
+ 1] = src
[i
];
23718 for (i
= 0; i
< count
; i
++)
23720 operands
[2 * i
] = dest
[count
- i
- 1];
23721 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23726 /* Split operands into moves from op[1] + op[2] into op[0]. */
23729 neon_split_vcombine (rtx operands
[3])
23731 unsigned int dest
= REGNO (operands
[0]);
23732 unsigned int src1
= REGNO (operands
[1]);
23733 unsigned int src2
= REGNO (operands
[2]);
23734 machine_mode halfmode
= GET_MODE (operands
[1]);
23735 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23736 rtx destlo
, desthi
;
23738 if (src1
== dest
&& src2
== dest
+ halfregs
)
23740 /* No-op move. Can't split to nothing; emit something. */
23741 emit_note (NOTE_INSN_DELETED
);
23745 /* Preserve register attributes for variable tracking. */
23746 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23747 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23748 GET_MODE_SIZE (halfmode
));
23750 /* Special case of reversed high/low parts. Use VSWP. */
23751 if (src2
== dest
&& src1
== dest
+ halfregs
)
23753 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23754 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23755 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23759 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23761 /* Try to avoid unnecessary moves if part of the result
23762 is in the right place already. */
23764 emit_move_insn (destlo
, operands
[1]);
23765 if (src2
!= dest
+ halfregs
)
23766 emit_move_insn (desthi
, operands
[2]);
23770 if (src2
!= dest
+ halfregs
)
23771 emit_move_insn (desthi
, operands
[2]);
23773 emit_move_insn (destlo
, operands
[1]);
23777 /* Return the number (counting from 0) of
23778 the least significant set bit in MASK. */
23781 number_of_first_bit_set (unsigned mask
)
23783 return ctz_hwi (mask
);
23786 /* Like emit_multi_reg_push, but allowing for a different set of
23787 registers to be described as saved. MASK is the set of registers
23788 to be saved; REAL_REGS is the set of registers to be described as
23789 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23792 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23794 unsigned long regno
;
23795 rtx par
[10], tmp
, reg
;
23799 /* Build the parallel of the registers actually being stored. */
23800 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23802 regno
= ctz_hwi (mask
);
23803 reg
= gen_rtx_REG (SImode
, regno
);
23806 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23808 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23813 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23814 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23815 tmp
= gen_frame_mem (BLKmode
, tmp
);
23816 tmp
= gen_rtx_SET (tmp
, par
[0]);
23819 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23820 insn
= emit_insn (tmp
);
23822 /* Always build the stack adjustment note for unwind info. */
23823 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23824 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23827 /* Build the parallel of the registers recorded as saved for unwind. */
23828 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23830 regno
= ctz_hwi (real_regs
);
23831 reg
= gen_rtx_REG (SImode
, regno
);
23833 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23834 tmp
= gen_frame_mem (SImode
, tmp
);
23835 tmp
= gen_rtx_SET (tmp
, reg
);
23836 RTX_FRAME_RELATED_P (tmp
) = 1;
23844 RTX_FRAME_RELATED_P (par
[0]) = 1;
23845 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23848 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23853 /* Emit code to push or pop registers to or from the stack. F is the
23854 assembly file. MASK is the registers to pop. */
23856 thumb_pop (FILE *f
, unsigned long mask
)
23859 int lo_mask
= mask
& 0xFF;
23860 int pushed_words
= 0;
23864 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23866 /* Special case. Do not generate a POP PC statement here, do it in
23868 thumb_exit (f
, -1);
23872 fprintf (f
, "\tpop\t{");
23874 /* Look at the low registers first. */
23875 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23879 asm_fprintf (f
, "%r", regno
);
23881 if ((lo_mask
& ~1) != 0)
23888 if (mask
& (1 << PC_REGNUM
))
23890 /* Catch popping the PC. */
23891 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23892 || crtl
->calls_eh_return
)
23894 /* The PC is never poped directly, instead
23895 it is popped into r3 and then BX is used. */
23896 fprintf (f
, "}\n");
23898 thumb_exit (f
, -1);
23907 asm_fprintf (f
, "%r", PC_REGNUM
);
23911 fprintf (f
, "}\n");
23914 /* Generate code to return from a thumb function.
23915 If 'reg_containing_return_addr' is -1, then the return address is
23916 actually on the stack, at the stack pointer. */
23918 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23920 unsigned regs_available_for_popping
;
23921 unsigned regs_to_pop
;
23923 unsigned available
;
23927 int restore_a4
= FALSE
;
23929 /* Compute the registers we need to pop. */
23933 if (reg_containing_return_addr
== -1)
23935 regs_to_pop
|= 1 << LR_REGNUM
;
23939 if (TARGET_BACKTRACE
)
23941 /* Restore the (ARM) frame pointer and stack pointer. */
23942 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23946 /* If there is nothing to pop then just emit the BX instruction and
23948 if (pops_needed
== 0)
23950 if (crtl
->calls_eh_return
)
23951 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23953 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23956 /* Otherwise if we are not supporting interworking and we have not created
23957 a backtrace structure and the function was not entered in ARM mode then
23958 just pop the return address straight into the PC. */
23959 else if (!TARGET_INTERWORK
23960 && !TARGET_BACKTRACE
23961 && !is_called_in_ARM_mode (current_function_decl
)
23962 && !crtl
->calls_eh_return
)
23964 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23968 /* Find out how many of the (return) argument registers we can corrupt. */
23969 regs_available_for_popping
= 0;
23971 /* If returning via __builtin_eh_return, the bottom three registers
23972 all contain information needed for the return. */
23973 if (crtl
->calls_eh_return
)
23977 /* If we can deduce the registers used from the function's
23978 return value. This is more reliable that examining
23979 df_regs_ever_live_p () because that will be set if the register is
23980 ever used in the function, not just if the register is used
23981 to hold a return value. */
23983 if (crtl
->return_rtx
!= 0)
23984 mode
= GET_MODE (crtl
->return_rtx
);
23986 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23988 size
= GET_MODE_SIZE (mode
);
23992 /* In a void function we can use any argument register.
23993 In a function that returns a structure on the stack
23994 we can use the second and third argument registers. */
23995 if (mode
== VOIDmode
)
23996 regs_available_for_popping
=
23997 (1 << ARG_REGISTER (1))
23998 | (1 << ARG_REGISTER (2))
23999 | (1 << ARG_REGISTER (3));
24001 regs_available_for_popping
=
24002 (1 << ARG_REGISTER (2))
24003 | (1 << ARG_REGISTER (3));
24005 else if (size
<= 4)
24006 regs_available_for_popping
=
24007 (1 << ARG_REGISTER (2))
24008 | (1 << ARG_REGISTER (3));
24009 else if (size
<= 8)
24010 regs_available_for_popping
=
24011 (1 << ARG_REGISTER (3));
24014 /* Match registers to be popped with registers into which we pop them. */
24015 for (available
= regs_available_for_popping
,
24016 required
= regs_to_pop
;
24017 required
!= 0 && available
!= 0;
24018 available
&= ~(available
& - available
),
24019 required
&= ~(required
& - required
))
24022 /* If we have any popping registers left over, remove them. */
24024 regs_available_for_popping
&= ~available
;
24026 /* Otherwise if we need another popping register we can use
24027 the fourth argument register. */
24028 else if (pops_needed
)
24030 /* If we have not found any free argument registers and
24031 reg a4 contains the return address, we must move it. */
24032 if (regs_available_for_popping
== 0
24033 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24035 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24036 reg_containing_return_addr
= LR_REGNUM
;
24038 else if (size
> 12)
24040 /* Register a4 is being used to hold part of the return value,
24041 but we have dire need of a free, low register. */
24044 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24047 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24049 /* The fourth argument register is available. */
24050 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24056 /* Pop as many registers as we can. */
24057 thumb_pop (f
, regs_available_for_popping
);
24059 /* Process the registers we popped. */
24060 if (reg_containing_return_addr
== -1)
24062 /* The return address was popped into the lowest numbered register. */
24063 regs_to_pop
&= ~(1 << LR_REGNUM
);
24065 reg_containing_return_addr
=
24066 number_of_first_bit_set (regs_available_for_popping
);
24068 /* Remove this register for the mask of available registers, so that
24069 the return address will not be corrupted by further pops. */
24070 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24073 /* If we popped other registers then handle them here. */
24074 if (regs_available_for_popping
)
24078 /* Work out which register currently contains the frame pointer. */
24079 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24081 /* Move it into the correct place. */
24082 asm_fprintf (f
, "\tmov\t%r, %r\n",
24083 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24085 /* (Temporarily) remove it from the mask of popped registers. */
24086 regs_available_for_popping
&= ~(1 << frame_pointer
);
24087 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24089 if (regs_available_for_popping
)
24093 /* We popped the stack pointer as well,
24094 find the register that contains it. */
24095 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24097 /* Move it into the stack register. */
24098 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24100 /* At this point we have popped all necessary registers, so
24101 do not worry about restoring regs_available_for_popping
24102 to its correct value:
24104 assert (pops_needed == 0)
24105 assert (regs_available_for_popping == (1 << frame_pointer))
24106 assert (regs_to_pop == (1 << STACK_POINTER)) */
24110 /* Since we have just move the popped value into the frame
24111 pointer, the popping register is available for reuse, and
24112 we know that we still have the stack pointer left to pop. */
24113 regs_available_for_popping
|= (1 << frame_pointer
);
24117 /* If we still have registers left on the stack, but we no longer have
24118 any registers into which we can pop them, then we must move the return
24119 address into the link register and make available the register that
24121 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24123 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24125 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24126 reg_containing_return_addr
);
24128 reg_containing_return_addr
= LR_REGNUM
;
24131 /* If we have registers left on the stack then pop some more.
24132 We know that at most we will want to pop FP and SP. */
24133 if (pops_needed
> 0)
24138 thumb_pop (f
, regs_available_for_popping
);
24140 /* We have popped either FP or SP.
24141 Move whichever one it is into the correct register. */
24142 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24143 move_to
= number_of_first_bit_set (regs_to_pop
);
24145 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24147 regs_to_pop
&= ~(1 << move_to
);
24152 /* If we still have not popped everything then we must have only
24153 had one register available to us and we are now popping the SP. */
24154 if (pops_needed
> 0)
24158 thumb_pop (f
, regs_available_for_popping
);
24160 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24162 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24164 assert (regs_to_pop == (1 << STACK_POINTER))
24165 assert (pops_needed == 1)
24169 /* If necessary restore the a4 register. */
24172 if (reg_containing_return_addr
!= LR_REGNUM
)
24174 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24175 reg_containing_return_addr
= LR_REGNUM
;
24178 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24181 if (crtl
->calls_eh_return
)
24182 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24184 /* Return to caller. */
24185 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24188 /* Scan INSN just before assembler is output for it.
24189 For Thumb-1, we track the status of the condition codes; this
24190 information is used in the cbranchsi4_insn pattern. */
24192 thumb1_final_prescan_insn (rtx_insn
*insn
)
24194 if (flag_print_asm_name
)
24195 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24196 INSN_ADDRESSES (INSN_UID (insn
)));
24197 /* Don't overwrite the previous setter when we get to a cbranch. */
24198 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24200 enum attr_conds conds
;
24202 if (cfun
->machine
->thumb1_cc_insn
)
24204 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24205 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24208 conds
= get_attr_conds (insn
);
24209 if (conds
== CONDS_SET
)
24211 rtx set
= single_set (insn
);
24212 cfun
->machine
->thumb1_cc_insn
= insn
;
24213 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24214 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24215 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24216 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24218 rtx src1
= XEXP (SET_SRC (set
), 1);
24219 if (src1
== const0_rtx
)
24220 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24222 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24224 /* Record the src register operand instead of dest because
24225 cprop_hardreg pass propagates src. */
24226 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24229 else if (conds
!= CONDS_NOCOND
)
24230 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24233 /* Check if unexpected far jump is used. */
24234 if (cfun
->machine
->lr_save_eliminated
24235 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24236 internal_error("Unexpected thumb1 far jump");
24240 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24242 unsigned HOST_WIDE_INT mask
= 0xff;
24245 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24246 if (val
== 0) /* XXX */
24249 for (i
= 0; i
< 25; i
++)
24250 if ((val
& (mask
<< i
)) == val
)
24256 /* Returns nonzero if the current function contains,
24257 or might contain a far jump. */
24259 thumb_far_jump_used_p (void)
24262 bool far_jump
= false;
24263 unsigned int func_size
= 0;
24265 /* This test is only important for leaf functions. */
24266 /* assert (!leaf_function_p ()); */
24268 /* If we have already decided that far jumps may be used,
24269 do not bother checking again, and always return true even if
24270 it turns out that they are not being used. Once we have made
24271 the decision that far jumps are present (and that hence the link
24272 register will be pushed onto the stack) we cannot go back on it. */
24273 if (cfun
->machine
->far_jump_used
)
24276 /* If this function is not being called from the prologue/epilogue
24277 generation code then it must be being called from the
24278 INITIAL_ELIMINATION_OFFSET macro. */
24279 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24281 /* In this case we know that we are being asked about the elimination
24282 of the arg pointer register. If that register is not being used,
24283 then there are no arguments on the stack, and we do not have to
24284 worry that a far jump might force the prologue to push the link
24285 register, changing the stack offsets. In this case we can just
24286 return false, since the presence of far jumps in the function will
24287 not affect stack offsets.
24289 If the arg pointer is live (or if it was live, but has now been
24290 eliminated and so set to dead) then we do have to test to see if
24291 the function might contain a far jump. This test can lead to some
24292 false negatives, since before reload is completed, then length of
24293 branch instructions is not known, so gcc defaults to returning their
24294 longest length, which in turn sets the far jump attribute to true.
24296 A false negative will not result in bad code being generated, but it
24297 will result in a needless push and pop of the link register. We
24298 hope that this does not occur too often.
24300 If we need doubleword stack alignment this could affect the other
24301 elimination offsets so we can't risk getting it wrong. */
24302 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24303 cfun
->machine
->arg_pointer_live
= 1;
24304 else if (!cfun
->machine
->arg_pointer_live
)
24308 /* We should not change far_jump_used during or after reload, as there is
24309 no chance to change stack frame layout. */
24310 if (reload_in_progress
|| reload_completed
)
24313 /* Check to see if the function contains a branch
24314 insn with the far jump attribute set. */
24315 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24317 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24321 func_size
+= get_attr_length (insn
);
24324 /* Attribute far_jump will always be true for thumb1 before
24325 shorten_branch pass. So checking far_jump attribute before
24326 shorten_branch isn't much useful.
24328 Following heuristic tries to estimate more accurately if a far jump
24329 may finally be used. The heuristic is very conservative as there is
24330 no chance to roll-back the decision of not to use far jump.
24332 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24333 2-byte insn is associated with a 4 byte constant pool. Using
24334 function size 2048/3 as the threshold is conservative enough. */
24337 if ((func_size
* 3) >= 2048)
24339 /* Record the fact that we have decided that
24340 the function does use far jumps. */
24341 cfun
->machine
->far_jump_used
= 1;
24349 /* Return nonzero if FUNC must be entered in ARM mode. */
24351 is_called_in_ARM_mode (tree func
)
24353 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24355 /* Ignore the problem about functions whose address is taken. */
24356 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24360 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24366 /* Given the stack offsets and register mask in OFFSETS, decide how
24367 many additional registers to push instead of subtracting a constant
24368 from SP. For epilogues the principle is the same except we use pop.
24369 FOR_PROLOGUE indicates which we're generating. */
24371 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24373 HOST_WIDE_INT amount
;
24374 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24375 /* Extract a mask of the ones we can give to the Thumb's push/pop
24377 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24378 /* Then count how many other high registers will need to be pushed. */
24379 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24380 int n_free
, reg_base
, size
;
24382 if (!for_prologue
&& frame_pointer_needed
)
24383 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24385 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24387 /* If the stack frame size is 512 exactly, we can save one load
24388 instruction, which should make this a win even when optimizing
24390 if (!optimize_size
&& amount
!= 512)
24393 /* Can't do this if there are high registers to push. */
24394 if (high_regs_pushed
!= 0)
24397 /* Shouldn't do it in the prologue if no registers would normally
24398 be pushed at all. In the epilogue, also allow it if we'll have
24399 a pop insn for the PC. */
24402 || TARGET_BACKTRACE
24403 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24404 || TARGET_INTERWORK
24405 || crtl
->args
.pretend_args_size
!= 0))
24408 /* Don't do this if thumb_expand_prologue wants to emit instructions
24409 between the push and the stack frame allocation. */
24411 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24412 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24419 size
= arm_size_return_regs ();
24420 reg_base
= ARM_NUM_INTS (size
);
24421 live_regs_mask
>>= reg_base
;
24424 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24425 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24427 live_regs_mask
>>= 1;
24433 gcc_assert (amount
/ 4 * 4 == amount
);
24435 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24436 return (amount
- 508) / 4;
24437 if (amount
<= n_free
* 4)
24442 /* The bits which aren't usefully expanded as rtl. */
24444 thumb1_unexpanded_epilogue (void)
24446 arm_stack_offsets
*offsets
;
24448 unsigned long live_regs_mask
= 0;
24449 int high_regs_pushed
= 0;
24451 int had_to_push_lr
;
24454 if (cfun
->machine
->return_used_this_function
!= 0)
24457 if (IS_NAKED (arm_current_func_type ()))
24460 offsets
= arm_get_frame_offsets ();
24461 live_regs_mask
= offsets
->saved_regs_mask
;
24462 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24464 /* If we can deduce the registers used from the function's return value.
24465 This is more reliable that examining df_regs_ever_live_p () because that
24466 will be set if the register is ever used in the function, not just if
24467 the register is used to hold a return value. */
24468 size
= arm_size_return_regs ();
24470 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24473 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24474 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24477 /* The prolog may have pushed some high registers to use as
24478 work registers. e.g. the testsuite file:
24479 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24480 compiles to produce:
24481 push {r4, r5, r6, r7, lr}
24485 as part of the prolog. We have to undo that pushing here. */
24487 if (high_regs_pushed
)
24489 unsigned long mask
= live_regs_mask
& 0xff;
24492 /* The available low registers depend on the size of the value we are
24500 /* Oh dear! We have no low registers into which we can pop
24503 ("no low registers available for popping high registers");
24505 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24506 if (live_regs_mask
& (1 << next_hi_reg
))
24509 while (high_regs_pushed
)
24511 /* Find lo register(s) into which the high register(s) can
24513 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24515 if (mask
& (1 << regno
))
24516 high_regs_pushed
--;
24517 if (high_regs_pushed
== 0)
24521 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24523 /* Pop the values into the low register(s). */
24524 thumb_pop (asm_out_file
, mask
);
24526 /* Move the value(s) into the high registers. */
24527 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24529 if (mask
& (1 << regno
))
24531 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24534 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24535 if (live_regs_mask
& (1 << next_hi_reg
))
24540 live_regs_mask
&= ~0x0f00;
24543 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24544 live_regs_mask
&= 0xff;
24546 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24548 /* Pop the return address into the PC. */
24549 if (had_to_push_lr
)
24550 live_regs_mask
|= 1 << PC_REGNUM
;
24552 /* Either no argument registers were pushed or a backtrace
24553 structure was created which includes an adjusted stack
24554 pointer, so just pop everything. */
24555 if (live_regs_mask
)
24556 thumb_pop (asm_out_file
, live_regs_mask
);
24558 /* We have either just popped the return address into the
24559 PC or it is was kept in LR for the entire function.
24560 Note that thumb_pop has already called thumb_exit if the
24561 PC was in the list. */
24562 if (!had_to_push_lr
)
24563 thumb_exit (asm_out_file
, LR_REGNUM
);
24567 /* Pop everything but the return address. */
24568 if (live_regs_mask
)
24569 thumb_pop (asm_out_file
, live_regs_mask
);
24571 if (had_to_push_lr
)
24575 /* We have no free low regs, so save one. */
24576 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24580 /* Get the return address into a temporary register. */
24581 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24585 /* Move the return address to lr. */
24586 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24588 /* Restore the low register. */
24589 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24594 regno
= LAST_ARG_REGNUM
;
24599 /* Remove the argument registers that were pushed onto the stack. */
24600 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24601 SP_REGNUM
, SP_REGNUM
,
24602 crtl
->args
.pretend_args_size
);
24604 thumb_exit (asm_out_file
, regno
);
24610 /* Functions to save and restore machine-specific function data. */
24611 static struct machine_function
*
24612 arm_init_machine_status (void)
24614 struct machine_function
*machine
;
24615 machine
= ggc_cleared_alloc
<machine_function
> ();
24617 #if ARM_FT_UNKNOWN != 0
24618 machine
->func_type
= ARM_FT_UNKNOWN
;
24623 /* Return an RTX indicating where the return address to the
24624 calling function can be found. */
24626 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24631 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24634 /* Do anything needed before RTL is emitted for each function. */
24636 arm_init_expanders (void)
24638 /* Arrange to initialize and mark the machine per-function status. */
24639 init_machine_status
= arm_init_machine_status
;
24641 /* This is to stop the combine pass optimizing away the alignment
24642 adjustment of va_arg. */
24643 /* ??? It is claimed that this should not be necessary. */
24645 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24648 /* Check that FUNC is called with a different mode. */
24651 arm_change_mode_p (tree func
)
24653 if (TREE_CODE (func
) != FUNCTION_DECL
)
24656 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24659 callee_tree
= target_option_default_node
;
24661 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24662 int flags
= callee_opts
->x_target_flags
;
24664 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24667 /* Like arm_compute_initial_elimination offset. Simpler because there
24668 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24669 to point at the base of the local variables after static stack
24670 space for a function has been allocated. */
24673 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24675 arm_stack_offsets
*offsets
;
24677 offsets
= arm_get_frame_offsets ();
24681 case ARG_POINTER_REGNUM
:
24684 case STACK_POINTER_REGNUM
:
24685 return offsets
->outgoing_args
- offsets
->saved_args
;
24687 case FRAME_POINTER_REGNUM
:
24688 return offsets
->soft_frame
- offsets
->saved_args
;
24690 case ARM_HARD_FRAME_POINTER_REGNUM
:
24691 return offsets
->saved_regs
- offsets
->saved_args
;
24693 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24694 return offsets
->locals_base
- offsets
->saved_args
;
24697 gcc_unreachable ();
24701 case FRAME_POINTER_REGNUM
:
24704 case STACK_POINTER_REGNUM
:
24705 return offsets
->outgoing_args
- offsets
->soft_frame
;
24707 case ARM_HARD_FRAME_POINTER_REGNUM
:
24708 return offsets
->saved_regs
- offsets
->soft_frame
;
24710 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24711 return offsets
->locals_base
- offsets
->soft_frame
;
24714 gcc_unreachable ();
24719 gcc_unreachable ();
24723 /* Generate the function's prologue. */
24726 thumb1_expand_prologue (void)
24730 HOST_WIDE_INT amount
;
24731 HOST_WIDE_INT size
;
24732 arm_stack_offsets
*offsets
;
24733 unsigned long func_type
;
24735 unsigned long live_regs_mask
;
24736 unsigned long l_mask
;
24737 unsigned high_regs_pushed
= 0;
24739 func_type
= arm_current_func_type ();
24741 /* Naked functions don't have prologues. */
24742 if (IS_NAKED (func_type
))
24744 if (flag_stack_usage_info
)
24745 current_function_static_stack_size
= 0;
24749 if (IS_INTERRUPT (func_type
))
24751 error ("interrupt Service Routines cannot be coded in Thumb mode");
24755 if (is_called_in_ARM_mode (current_function_decl
))
24756 emit_insn (gen_prologue_thumb1_interwork ());
24758 offsets
= arm_get_frame_offsets ();
24759 live_regs_mask
= offsets
->saved_regs_mask
;
24761 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24762 l_mask
= live_regs_mask
& 0x40ff;
24763 /* Then count how many other high registers will need to be pushed. */
24764 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24766 if (crtl
->args
.pretend_args_size
)
24768 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24770 if (cfun
->machine
->uses_anonymous_args
)
24772 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24773 unsigned long mask
;
24775 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24776 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24778 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24782 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24783 stack_pointer_rtx
, x
));
24785 RTX_FRAME_RELATED_P (insn
) = 1;
24788 if (TARGET_BACKTRACE
)
24790 HOST_WIDE_INT offset
= 0;
24791 unsigned work_register
;
24792 rtx work_reg
, x
, arm_hfp_rtx
;
24794 /* We have been asked to create a stack backtrace structure.
24795 The code looks like this:
24799 0 sub SP, #16 Reserve space for 4 registers.
24800 2 push {R7} Push low registers.
24801 4 add R7, SP, #20 Get the stack pointer before the push.
24802 6 str R7, [SP, #8] Store the stack pointer
24803 (before reserving the space).
24804 8 mov R7, PC Get hold of the start of this code + 12.
24805 10 str R7, [SP, #16] Store it.
24806 12 mov R7, FP Get hold of the current frame pointer.
24807 14 str R7, [SP, #4] Store it.
24808 16 mov R7, LR Get hold of the current return address.
24809 18 str R7, [SP, #12] Store it.
24810 20 add R7, SP, #16 Point at the start of the
24811 backtrace structure.
24812 22 mov FP, R7 Put this value into the frame pointer. */
24814 work_register
= thumb_find_work_register (live_regs_mask
);
24815 work_reg
= gen_rtx_REG (SImode
, work_register
);
24816 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24818 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24819 stack_pointer_rtx
, GEN_INT (-16)));
24820 RTX_FRAME_RELATED_P (insn
) = 1;
24824 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24825 RTX_FRAME_RELATED_P (insn
) = 1;
24827 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24830 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24831 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24833 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24834 x
= gen_frame_mem (SImode
, x
);
24835 emit_move_insn (x
, work_reg
);
24837 /* Make sure that the instruction fetching the PC is in the right place
24838 to calculate "start of backtrace creation code + 12". */
24839 /* ??? The stores using the common WORK_REG ought to be enough to
24840 prevent the scheduler from doing anything weird. Failing that
24841 we could always move all of the following into an UNSPEC_VOLATILE. */
24844 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24845 emit_move_insn (work_reg
, x
);
24847 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24848 x
= gen_frame_mem (SImode
, x
);
24849 emit_move_insn (x
, work_reg
);
24851 emit_move_insn (work_reg
, arm_hfp_rtx
);
24853 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24854 x
= gen_frame_mem (SImode
, x
);
24855 emit_move_insn (x
, work_reg
);
24859 emit_move_insn (work_reg
, arm_hfp_rtx
);
24861 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24862 x
= gen_frame_mem (SImode
, x
);
24863 emit_move_insn (x
, work_reg
);
24865 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24866 emit_move_insn (work_reg
, x
);
24868 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24869 x
= gen_frame_mem (SImode
, x
);
24870 emit_move_insn (x
, work_reg
);
24873 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24874 emit_move_insn (work_reg
, x
);
24876 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24877 x
= gen_frame_mem (SImode
, x
);
24878 emit_move_insn (x
, work_reg
);
24880 x
= GEN_INT (offset
+ 12);
24881 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24883 emit_move_insn (arm_hfp_rtx
, work_reg
);
24885 /* Optimization: If we are not pushing any low registers but we are going
24886 to push some high registers then delay our first push. This will just
24887 be a push of LR and we can combine it with the push of the first high
24889 else if ((l_mask
& 0xff) != 0
24890 || (high_regs_pushed
== 0 && l_mask
))
24892 unsigned long mask
= l_mask
;
24893 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24894 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24895 RTX_FRAME_RELATED_P (insn
) = 1;
24898 if (high_regs_pushed
)
24900 unsigned pushable_regs
;
24901 unsigned next_hi_reg
;
24902 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24903 : crtl
->args
.info
.nregs
;
24904 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24906 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24907 if (live_regs_mask
& (1 << next_hi_reg
))
24910 /* Here we need to mask out registers used for passing arguments
24911 even if they can be pushed. This is to avoid using them to stash the high
24912 registers. Such kind of stash may clobber the use of arguments. */
24913 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24915 if (pushable_regs
== 0)
24916 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24918 while (high_regs_pushed
> 0)
24920 unsigned long real_regs_mask
= 0;
24922 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24924 if (pushable_regs
& (1 << regno
))
24926 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24927 gen_rtx_REG (SImode
, next_hi_reg
));
24929 high_regs_pushed
--;
24930 real_regs_mask
|= (1 << next_hi_reg
);
24932 if (high_regs_pushed
)
24934 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24936 if (live_regs_mask
& (1 << next_hi_reg
))
24941 pushable_regs
&= ~((1 << regno
) - 1);
24947 /* If we had to find a work register and we have not yet
24948 saved the LR then add it to the list of regs to push. */
24949 if (l_mask
== (1 << LR_REGNUM
))
24951 pushable_regs
|= l_mask
;
24952 real_regs_mask
|= l_mask
;
24956 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24957 RTX_FRAME_RELATED_P (insn
) = 1;
24961 /* Load the pic register before setting the frame pointer,
24962 so we can use r7 as a temporary work register. */
24963 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24964 arm_load_pic_register (live_regs_mask
);
24966 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24967 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24968 stack_pointer_rtx
);
24970 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24971 if (flag_stack_usage_info
)
24972 current_function_static_stack_size
= size
;
24974 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24975 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24976 sorry ("-fstack-check=specific for Thumb-1");
24978 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24979 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24984 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24985 GEN_INT (- amount
)));
24986 RTX_FRAME_RELATED_P (insn
) = 1;
24992 /* The stack decrement is too big for an immediate value in a single
24993 insn. In theory we could issue multiple subtracts, but after
24994 three of them it becomes more space efficient to place the full
24995 value in the constant pool and load into a register. (Also the
24996 ARM debugger really likes to see only one stack decrement per
24997 function). So instead we look for a scratch register into which
24998 we can load the decrement, and then we subtract this from the
24999 stack pointer. Unfortunately on the thumb the only available
25000 scratch registers are the argument registers, and we cannot use
25001 these as they may hold arguments to the function. Instead we
25002 attempt to locate a call preserved register which is used by this
25003 function. If we can find one, then we know that it will have
25004 been pushed at the start of the prologue and so we can corrupt
25006 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25007 if (live_regs_mask
& (1 << regno
))
25010 gcc_assert(regno
<= LAST_LO_REGNUM
);
25012 reg
= gen_rtx_REG (SImode
, regno
);
25014 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25016 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25017 stack_pointer_rtx
, reg
));
25019 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25020 plus_constant (Pmode
, stack_pointer_rtx
,
25022 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25023 RTX_FRAME_RELATED_P (insn
) = 1;
25027 if (frame_pointer_needed
)
25028 thumb_set_frame_pointer (offsets
);
25030 /* If we are profiling, make sure no instructions are scheduled before
25031 the call to mcount. Similarly if the user has requested no
25032 scheduling in the prolog. Similarly if we want non-call exceptions
25033 using the EABI unwinder, to prevent faulting instructions from being
25034 swapped with a stack adjustment. */
25035 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25036 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25037 && cfun
->can_throw_non_call_exceptions
))
25038 emit_insn (gen_blockage ());
25040 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25041 if (live_regs_mask
& 0xff)
25042 cfun
->machine
->lr_save_eliminated
= 0;
25045 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25046 POP instruction can be generated. LR should be replaced by PC. All
25047 the checks required are already done by USE_RETURN_INSN (). Hence,
25048 all we really need to check here is if single register is to be
25049 returned, or multiple register return. */
25051 thumb2_expand_return (bool simple_return
)
25054 unsigned long saved_regs_mask
;
25055 arm_stack_offsets
*offsets
;
25057 offsets
= arm_get_frame_offsets ();
25058 saved_regs_mask
= offsets
->saved_regs_mask
;
25060 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25061 if (saved_regs_mask
& (1 << i
))
25064 if (!simple_return
&& saved_regs_mask
)
25068 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25069 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25070 rtx addr
= gen_rtx_MEM (SImode
,
25071 gen_rtx_POST_INC (SImode
,
25072 stack_pointer_rtx
));
25073 set_mem_alias_set (addr
, get_frame_alias_set ());
25074 XVECEXP (par
, 0, 0) = ret_rtx
;
25075 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25076 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25077 emit_jump_insn (par
);
25081 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25082 saved_regs_mask
|= (1 << PC_REGNUM
);
25083 arm_emit_multi_reg_pop (saved_regs_mask
);
25088 emit_jump_insn (simple_return_rtx
);
25093 thumb1_expand_epilogue (void)
25095 HOST_WIDE_INT amount
;
25096 arm_stack_offsets
*offsets
;
25099 /* Naked functions don't have prologues. */
25100 if (IS_NAKED (arm_current_func_type ()))
25103 offsets
= arm_get_frame_offsets ();
25104 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25106 if (frame_pointer_needed
)
25108 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25109 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25111 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25113 gcc_assert (amount
>= 0);
25116 emit_insn (gen_blockage ());
25119 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25120 GEN_INT (amount
)));
25123 /* r3 is always free in the epilogue. */
25124 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25126 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25127 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25131 /* Emit a USE (stack_pointer_rtx), so that
25132 the stack adjustment will not be deleted. */
25133 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25135 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25136 emit_insn (gen_blockage ());
25138 /* Emit a clobber for each insn that will be restored in the epilogue,
25139 so that flow2 will get register lifetimes correct. */
25140 for (regno
= 0; regno
< 13; regno
++)
25141 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25142 emit_clobber (gen_rtx_REG (SImode
, regno
));
25144 if (! df_regs_ever_live_p (LR_REGNUM
))
25145 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25148 /* Epilogue code for APCS frame. */
25150 arm_expand_epilogue_apcs_frame (bool really_return
)
25152 unsigned long func_type
;
25153 unsigned long saved_regs_mask
;
25156 int floats_from_frame
= 0;
25157 arm_stack_offsets
*offsets
;
25159 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25160 func_type
= arm_current_func_type ();
25162 /* Get frame offsets for ARM. */
25163 offsets
= arm_get_frame_offsets ();
25164 saved_regs_mask
= offsets
->saved_regs_mask
;
25166 /* Find the offset of the floating-point save area in the frame. */
25168 = (offsets
->saved_args
25169 + arm_compute_static_chain_stack_bytes ()
25172 /* Compute how many core registers saved and how far away the floats are. */
25173 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25174 if (saved_regs_mask
& (1 << i
))
25177 floats_from_frame
+= 4;
25180 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25183 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25185 /* The offset is from IP_REGNUM. */
25186 int saved_size
= arm_get_vfp_saved_size ();
25187 if (saved_size
> 0)
25190 floats_from_frame
+= saved_size
;
25191 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25192 hard_frame_pointer_rtx
,
25193 GEN_INT (-floats_from_frame
)));
25194 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25195 ip_rtx
, hard_frame_pointer_rtx
);
25198 /* Generate VFP register multi-pop. */
25199 start_reg
= FIRST_VFP_REGNUM
;
25201 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25202 /* Look for a case where a reg does not need restoring. */
25203 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25204 && (!df_regs_ever_live_p (i
+ 1)
25205 || call_used_regs
[i
+ 1]))
25207 if (start_reg
!= i
)
25208 arm_emit_vfp_multi_reg_pop (start_reg
,
25209 (i
- start_reg
) / 2,
25210 gen_rtx_REG (SImode
,
25215 /* Restore the remaining regs that we have discovered (or possibly
25216 even all of them, if the conditional in the for loop never
25218 if (start_reg
!= i
)
25219 arm_emit_vfp_multi_reg_pop (start_reg
,
25220 (i
- start_reg
) / 2,
25221 gen_rtx_REG (SImode
, IP_REGNUM
));
25226 /* The frame pointer is guaranteed to be non-double-word aligned, as
25227 it is set to double-word-aligned old_stack_pointer - 4. */
25229 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25231 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25232 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25234 rtx addr
= gen_frame_mem (V2SImode
,
25235 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25237 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25238 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25239 gen_rtx_REG (V2SImode
, i
),
25245 /* saved_regs_mask should contain IP which contains old stack pointer
25246 at the time of activation creation. Since SP and IP are adjacent registers,
25247 we can restore the value directly into SP. */
25248 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25249 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25250 saved_regs_mask
|= (1 << SP_REGNUM
);
25252 /* There are two registers left in saved_regs_mask - LR and PC. We
25253 only need to restore LR (the return address), but to
25254 save time we can load it directly into PC, unless we need a
25255 special function exit sequence, or we are not really returning. */
25257 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25258 && !crtl
->calls_eh_return
)
25259 /* Delete LR from the register mask, so that LR on
25260 the stack is loaded into the PC in the register mask. */
25261 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25263 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25265 num_regs
= bit_count (saved_regs_mask
);
25266 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25269 emit_insn (gen_blockage ());
25270 /* Unwind the stack to just below the saved registers. */
25271 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25272 hard_frame_pointer_rtx
,
25273 GEN_INT (- 4 * num_regs
)));
25275 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25276 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25279 arm_emit_multi_reg_pop (saved_regs_mask
);
25281 if (IS_INTERRUPT (func_type
))
25283 /* Interrupt handlers will have pushed the
25284 IP onto the stack, so restore it now. */
25286 rtx addr
= gen_rtx_MEM (SImode
,
25287 gen_rtx_POST_INC (SImode
,
25288 stack_pointer_rtx
));
25289 set_mem_alias_set (addr
, get_frame_alias_set ());
25290 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25291 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25292 gen_rtx_REG (SImode
, IP_REGNUM
),
25296 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25299 if (crtl
->calls_eh_return
)
25300 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25302 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25304 if (IS_STACKALIGN (func_type
))
25305 /* Restore the original stack pointer. Before prologue, the stack was
25306 realigned and the original stack pointer saved in r0. For details,
25307 see comment in arm_expand_prologue. */
25308 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25310 emit_jump_insn (simple_return_rtx
);
25313 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25314 function is not a sibcall. */
25316 arm_expand_epilogue (bool really_return
)
25318 unsigned long func_type
;
25319 unsigned long saved_regs_mask
;
25323 arm_stack_offsets
*offsets
;
25325 func_type
= arm_current_func_type ();
25327 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25328 let output_return_instruction take care of instruction emission if any. */
25329 if (IS_NAKED (func_type
)
25330 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25333 emit_jump_insn (simple_return_rtx
);
25337 /* If we are throwing an exception, then we really must be doing a
25338 return, so we can't tail-call. */
25339 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25341 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25343 arm_expand_epilogue_apcs_frame (really_return
);
25347 /* Get frame offsets for ARM. */
25348 offsets
= arm_get_frame_offsets ();
25349 saved_regs_mask
= offsets
->saved_regs_mask
;
25350 num_regs
= bit_count (saved_regs_mask
);
25352 if (frame_pointer_needed
)
25355 /* Restore stack pointer if necessary. */
25358 /* In ARM mode, frame pointer points to first saved register.
25359 Restore stack pointer to last saved register. */
25360 amount
= offsets
->frame
- offsets
->saved_regs
;
25362 /* Force out any pending memory operations that reference stacked data
25363 before stack de-allocation occurs. */
25364 emit_insn (gen_blockage ());
25365 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25366 hard_frame_pointer_rtx
,
25367 GEN_INT (amount
)));
25368 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25370 hard_frame_pointer_rtx
);
25372 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25374 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25378 /* In Thumb-2 mode, the frame pointer points to the last saved
25380 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25383 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25384 hard_frame_pointer_rtx
,
25385 GEN_INT (amount
)));
25386 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25387 hard_frame_pointer_rtx
,
25388 hard_frame_pointer_rtx
);
25391 /* Force out any pending memory operations that reference stacked data
25392 before stack de-allocation occurs. */
25393 emit_insn (gen_blockage ());
25394 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25395 hard_frame_pointer_rtx
));
25396 arm_add_cfa_adjust_cfa_note (insn
, 0,
25398 hard_frame_pointer_rtx
);
25399 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25401 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25406 /* Pop off outgoing args and local frame to adjust stack pointer to
25407 last saved register. */
25408 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25412 /* Force out any pending memory operations that reference stacked data
25413 before stack de-allocation occurs. */
25414 emit_insn (gen_blockage ());
25415 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25417 GEN_INT (amount
)));
25418 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25419 stack_pointer_rtx
, stack_pointer_rtx
);
25420 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25422 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25426 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25428 /* Generate VFP register multi-pop. */
25429 int end_reg
= LAST_VFP_REGNUM
+ 1;
25431 /* Scan the registers in reverse order. We need to match
25432 any groupings made in the prologue and generate matching
25433 vldm operations. The need to match groups is because,
25434 unlike pop, vldm can only do consecutive regs. */
25435 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25436 /* Look for a case where a reg does not need restoring. */
25437 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25438 && (!df_regs_ever_live_p (i
+ 1)
25439 || call_used_regs
[i
+ 1]))
25441 /* Restore the regs discovered so far (from reg+2 to
25443 if (end_reg
> i
+ 2)
25444 arm_emit_vfp_multi_reg_pop (i
+ 2,
25445 (end_reg
- (i
+ 2)) / 2,
25446 stack_pointer_rtx
);
25450 /* Restore the remaining regs that we have discovered (or possibly
25451 even all of them, if the conditional in the for loop never
25453 if (end_reg
> i
+ 2)
25454 arm_emit_vfp_multi_reg_pop (i
+ 2,
25455 (end_reg
- (i
+ 2)) / 2,
25456 stack_pointer_rtx
);
25460 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25461 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25464 rtx addr
= gen_rtx_MEM (V2SImode
,
25465 gen_rtx_POST_INC (SImode
,
25466 stack_pointer_rtx
));
25467 set_mem_alias_set (addr
, get_frame_alias_set ());
25468 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25469 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25470 gen_rtx_REG (V2SImode
, i
),
25472 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25473 stack_pointer_rtx
, stack_pointer_rtx
);
25476 if (saved_regs_mask
)
25479 bool return_in_pc
= false;
25481 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25482 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25483 && !IS_STACKALIGN (func_type
)
25485 && crtl
->args
.pretend_args_size
== 0
25486 && saved_regs_mask
& (1 << LR_REGNUM
)
25487 && !crtl
->calls_eh_return
)
25489 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25490 saved_regs_mask
|= (1 << PC_REGNUM
);
25491 return_in_pc
= true;
25494 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25496 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25497 if (saved_regs_mask
& (1 << i
))
25499 rtx addr
= gen_rtx_MEM (SImode
,
25500 gen_rtx_POST_INC (SImode
,
25501 stack_pointer_rtx
));
25502 set_mem_alias_set (addr
, get_frame_alias_set ());
25504 if (i
== PC_REGNUM
)
25506 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25507 XVECEXP (insn
, 0, 0) = ret_rtx
;
25508 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25510 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25511 insn
= emit_jump_insn (insn
);
25515 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25517 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25518 gen_rtx_REG (SImode
, i
),
25520 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25522 stack_pointer_rtx
);
25529 && current_tune
->prefer_ldrd_strd
25530 && !optimize_function_for_size_p (cfun
))
25533 thumb2_emit_ldrd_pop (saved_regs_mask
);
25534 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25535 arm_emit_ldrd_pop (saved_regs_mask
);
25537 arm_emit_multi_reg_pop (saved_regs_mask
);
25540 arm_emit_multi_reg_pop (saved_regs_mask
);
25548 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25552 rtx dwarf
= NULL_RTX
;
25554 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25556 GEN_INT (amount
)));
25558 RTX_FRAME_RELATED_P (tmp
) = 1;
25560 if (cfun
->machine
->uses_anonymous_args
)
25562 /* Restore pretend args. Refer arm_expand_prologue on how to save
25563 pretend_args in stack. */
25564 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25565 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25566 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25567 if (saved_regs_mask
& (1 << i
))
25569 rtx reg
= gen_rtx_REG (SImode
, i
);
25570 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25573 REG_NOTES (tmp
) = dwarf
;
25575 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25576 stack_pointer_rtx
, stack_pointer_rtx
);
25579 if (!really_return
)
25582 if (crtl
->calls_eh_return
)
25583 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25585 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25587 if (IS_STACKALIGN (func_type
))
25588 /* Restore the original stack pointer. Before prologue, the stack was
25589 realigned and the original stack pointer saved in r0. For details,
25590 see comment in arm_expand_prologue. */
25591 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25593 emit_jump_insn (simple_return_rtx
);
25596 /* Implementation of insn prologue_thumb1_interwork. This is the first
25597 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25600 thumb1_output_interwork (void)
25603 FILE *f
= asm_out_file
;
25605 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25606 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25608 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25610 /* Generate code sequence to switch us into Thumb mode. */
25611 /* The .code 32 directive has already been emitted by
25612 ASM_DECLARE_FUNCTION_NAME. */
25613 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25614 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25616 /* Generate a label, so that the debugger will notice the
25617 change in instruction sets. This label is also used by
25618 the assembler to bypass the ARM code when this function
25619 is called from a Thumb encoded function elsewhere in the
25620 same file. Hence the definition of STUB_NAME here must
25621 agree with the definition in gas/config/tc-arm.c. */
25623 #define STUB_NAME ".real_start_of"
25625 fprintf (f
, "\t.code\t16\n");
25627 if (arm_dllexport_name_p (name
))
25628 name
= arm_strip_name_encoding (name
);
25630 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25631 fprintf (f
, "\t.thumb_func\n");
25632 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25637 /* Handle the case of a double word load into a low register from
25638 a computed memory address. The computed address may involve a
25639 register which is overwritten by the load. */
25641 thumb_load_double_from_address (rtx
*operands
)
25649 gcc_assert (REG_P (operands
[0]));
25650 gcc_assert (MEM_P (operands
[1]));
25652 /* Get the memory address. */
25653 addr
= XEXP (operands
[1], 0);
25655 /* Work out how the memory address is computed. */
25656 switch (GET_CODE (addr
))
25659 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25661 if (REGNO (operands
[0]) == REGNO (addr
))
25663 output_asm_insn ("ldr\t%H0, %2", operands
);
25664 output_asm_insn ("ldr\t%0, %1", operands
);
25668 output_asm_insn ("ldr\t%0, %1", operands
);
25669 output_asm_insn ("ldr\t%H0, %2", operands
);
25674 /* Compute <address> + 4 for the high order load. */
25675 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25677 output_asm_insn ("ldr\t%0, %1", operands
);
25678 output_asm_insn ("ldr\t%H0, %2", operands
);
25682 arg1
= XEXP (addr
, 0);
25683 arg2
= XEXP (addr
, 1);
25685 if (CONSTANT_P (arg1
))
25686 base
= arg2
, offset
= arg1
;
25688 base
= arg1
, offset
= arg2
;
25690 gcc_assert (REG_P (base
));
25692 /* Catch the case of <address> = <reg> + <reg> */
25693 if (REG_P (offset
))
25695 int reg_offset
= REGNO (offset
);
25696 int reg_base
= REGNO (base
);
25697 int reg_dest
= REGNO (operands
[0]);
25699 /* Add the base and offset registers together into the
25700 higher destination register. */
25701 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25702 reg_dest
+ 1, reg_base
, reg_offset
);
25704 /* Load the lower destination register from the address in
25705 the higher destination register. */
25706 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25707 reg_dest
, reg_dest
+ 1);
25709 /* Load the higher destination register from its own address
25711 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25712 reg_dest
+ 1, reg_dest
+ 1);
25716 /* Compute <address> + 4 for the high order load. */
25717 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25719 /* If the computed address is held in the low order register
25720 then load the high order register first, otherwise always
25721 load the low order register first. */
25722 if (REGNO (operands
[0]) == REGNO (base
))
25724 output_asm_insn ("ldr\t%H0, %2", operands
);
25725 output_asm_insn ("ldr\t%0, %1", operands
);
25729 output_asm_insn ("ldr\t%0, %1", operands
);
25730 output_asm_insn ("ldr\t%H0, %2", operands
);
25736 /* With no registers to worry about we can just load the value
25738 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25740 output_asm_insn ("ldr\t%H0, %2", operands
);
25741 output_asm_insn ("ldr\t%0, %1", operands
);
25745 gcc_unreachable ();
25752 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25757 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25758 std::swap (operands
[4], operands
[5]);
25760 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25761 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25765 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25766 std::swap (operands
[4], operands
[5]);
25767 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25768 std::swap (operands
[5], operands
[6]);
25769 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25770 std::swap (operands
[4], operands
[5]);
25772 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25773 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25777 gcc_unreachable ();
25783 /* Output a call-via instruction for thumb state. */
25785 thumb_call_via_reg (rtx reg
)
25787 int regno
= REGNO (reg
);
25790 gcc_assert (regno
< LR_REGNUM
);
25792 /* If we are in the normal text section we can use a single instance
25793 per compilation unit. If we are doing function sections, then we need
25794 an entry per section, since we can't rely on reachability. */
25795 if (in_section
== text_section
)
25797 thumb_call_reg_needed
= 1;
25799 if (thumb_call_via_label
[regno
] == NULL
)
25800 thumb_call_via_label
[regno
] = gen_label_rtx ();
25801 labelp
= thumb_call_via_label
+ regno
;
25805 if (cfun
->machine
->call_via
[regno
] == NULL
)
25806 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25807 labelp
= cfun
->machine
->call_via
+ regno
;
25810 output_asm_insn ("bl\t%a0", labelp
);
25814 /* Routines for generating rtl. */
25816 thumb_expand_movmemqi (rtx
*operands
)
25818 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25819 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25820 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25821 HOST_WIDE_INT offset
= 0;
25825 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25831 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25837 rtx reg
= gen_reg_rtx (SImode
);
25838 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25839 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25846 rtx reg
= gen_reg_rtx (HImode
);
25847 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25848 plus_constant (Pmode
, in
,
25850 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25859 rtx reg
= gen_reg_rtx (QImode
);
25860 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25861 plus_constant (Pmode
, in
,
25863 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25870 thumb_reload_out_hi (rtx
*operands
)
25872 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25875 /* Return the length of a function name prefix
25876 that starts with the character 'c'. */
25878 arm_get_strip_length (int c
)
25882 ARM_NAME_ENCODING_LENGTHS
25887 /* Return a pointer to a function's name with any
25888 and all prefix encodings stripped from it. */
25890 arm_strip_name_encoding (const char *name
)
25894 while ((skip
= arm_get_strip_length (* name
)))
25900 /* If there is a '*' anywhere in the name's prefix, then
25901 emit the stripped name verbatim, otherwise prepend an
25902 underscore if leading underscores are being used. */
25904 arm_asm_output_labelref (FILE *stream
, const char *name
)
25909 while ((skip
= arm_get_strip_length (* name
)))
25911 verbatim
|= (*name
== '*');
25916 fputs (name
, stream
);
25918 asm_fprintf (stream
, "%U%s", name
);
25921 /* This function is used to emit an EABI tag and its associated value.
25922 We emit the numerical value of the tag in case the assembler does not
25923 support textual tags. (Eg gas prior to 2.20). If requested we include
25924 the tag name in a comment so that anyone reading the assembler output
25925 will know which tag is being set.
25927 This function is not static because arm-c.c needs it too. */
25930 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25932 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25933 if (flag_verbose_asm
|| flag_debug_asm
)
25934 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25935 asm_fprintf (asm_out_file
, "\n");
25938 /* This function is used to print CPU tuning information as comment
25939 in assembler file. Pointers are not printed for now. */
25942 arm_print_tune_info (void)
25944 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
25945 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
25946 current_tune
->constant_limit
);
25947 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
25948 current_tune
->max_insns_skipped
);
25949 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
25950 current_tune
->prefetch
.num_slots
);
25951 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
25952 current_tune
->prefetch
.l1_cache_size
);
25953 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25954 current_tune
->prefetch
.l1_cache_line_size
);
25955 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
25956 (int) current_tune
->prefer_constant_pool
);
25957 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25958 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
25959 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
25960 current_tune
->branch_cost (false, false));
25961 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
25962 current_tune
->branch_cost (false, true));
25963 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
25964 current_tune
->branch_cost (true, false));
25965 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
25966 current_tune
->branch_cost (true, true));
25967 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
25968 (int) current_tune
->prefer_ldrd_strd
);
25969 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25970 (int) current_tune
->logical_op_non_short_circuit_thumb
,
25971 (int) current_tune
->logical_op_non_short_circuit_arm
);
25972 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
25973 (int) current_tune
->prefer_neon_for_64bits
);
25974 asm_fprintf (asm_out_file
,
25975 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25976 (int) current_tune
->disparage_flag_setting_t16_encodings
);
25977 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
25978 (int) current_tune
->string_ops_prefer_neon
);
25979 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
25980 current_tune
->max_insns_inline_memset
);
25981 asm_fprintf (asm_out_file
, "\t\t@fusible_ops:\t%u\n",
25982 current_tune
->fusible_ops
);
25983 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
25984 (int) current_tune
->sched_autopref
);
25988 arm_file_start (void)
25994 if (arm_selected_arch
)
25996 /* armv7ve doesn't support any extensions. */
25997 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25999 /* Keep backward compatability for assemblers
26000 which don't support armv7ve. */
26001 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26002 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26003 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26004 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26005 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26009 const char* pos
= strchr (arm_selected_arch
->name
, '+');
26013 gcc_assert (strlen (arm_selected_arch
->name
)
26014 <= sizeof (buf
) / sizeof (*pos
));
26015 strncpy (buf
, arm_selected_arch
->name
,
26016 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
26017 buf
[pos
- arm_selected_arch
->name
] = '\0';
26018 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
26019 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
26022 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
26025 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
26026 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
26029 const char* truncated_name
26030 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
26031 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26034 if (print_tune_info
)
26035 arm_print_tune_info ();
26037 if (! TARGET_SOFT_FLOAT
&& TARGET_VFP
)
26039 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26040 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26042 if (TARGET_HARD_FLOAT_ABI
)
26043 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26046 /* Some of these attributes only apply when the corresponding features
26047 are used. However we don't have any easy way of figuring this out.
26048 Conservatively record the setting that would have been used. */
26050 if (flag_rounding_math
)
26051 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26053 if (!flag_unsafe_math_optimizations
)
26055 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26056 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26058 if (flag_signaling_nans
)
26059 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26061 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26062 flag_finite_math_only
? 1 : 3);
26064 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26065 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26066 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26067 flag_short_enums
? 1 : 2);
26069 /* Tag_ABI_optimization_goals. */
26072 else if (optimize
>= 2)
26078 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26080 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26083 if (arm_fp16_format
)
26084 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26085 (int) arm_fp16_format
);
26087 if (arm_lang_output_object_attributes_hook
)
26088 arm_lang_output_object_attributes_hook();
26091 default_file_start ();
26095 arm_file_end (void)
26099 if (NEED_INDICATE_EXEC_STACK
)
26100 /* Add .note.GNU-stack. */
26101 file_end_indicate_exec_stack ();
26103 if (! thumb_call_reg_needed
)
26106 switch_to_section (text_section
);
26107 asm_fprintf (asm_out_file
, "\t.code 16\n");
26108 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26110 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26112 rtx label
= thumb_call_via_label
[regno
];
26116 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26117 CODE_LABEL_NUMBER (label
));
26118 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26124 /* Symbols in the text segment can be accessed without indirecting via the
26125 constant pool; it may take an extra binary operation, but this is still
26126 faster than indirecting via memory. Don't do this when not optimizing,
26127 since we won't be calculating al of the offsets necessary to do this
26131 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26133 if (optimize
> 0 && TREE_CONSTANT (decl
))
26134 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26136 default_encode_section_info (decl
, rtl
, first
);
26138 #endif /* !ARM_PE */
26141 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26143 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26144 && !strcmp (prefix
, "L"))
26146 arm_ccfsm_state
= 0;
26147 arm_target_insn
= NULL
;
26149 default_internal_label (stream
, prefix
, labelno
);
26152 /* Output code to add DELTA to the first argument, and then jump
26153 to FUNCTION. Used for C++ multiple inheritance. */
26156 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26157 HOST_WIDE_INT
, tree function
)
26159 static int thunk_label
= 0;
26162 int mi_delta
= delta
;
26163 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26165 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26168 mi_delta
= - mi_delta
;
26170 final_start_function (emit_barrier (), file
, 1);
26174 int labelno
= thunk_label
++;
26175 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26176 /* Thunks are entered in arm mode when avaiable. */
26177 if (TARGET_THUMB1_ONLY
)
26179 /* push r3 so we can use it as a temporary. */
26180 /* TODO: Omit this save if r3 is not used. */
26181 fputs ("\tpush {r3}\n", file
);
26182 fputs ("\tldr\tr3, ", file
);
26186 fputs ("\tldr\tr12, ", file
);
26188 assemble_name (file
, label
);
26189 fputc ('\n', file
);
26192 /* If we are generating PIC, the ldr instruction below loads
26193 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26194 the address of the add + 8, so we have:
26196 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26199 Note that we have "+ 1" because some versions of GNU ld
26200 don't set the low bit of the result for R_ARM_REL32
26201 relocations against thumb function symbols.
26202 On ARMv6M this is +4, not +8. */
26203 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26204 assemble_name (file
, labelpc
);
26205 fputs (":\n", file
);
26206 if (TARGET_THUMB1_ONLY
)
26208 /* This is 2 insns after the start of the thunk, so we know it
26209 is 4-byte aligned. */
26210 fputs ("\tadd\tr3, pc, r3\n", file
);
26211 fputs ("\tmov r12, r3\n", file
);
26214 fputs ("\tadd\tr12, pc, r12\n", file
);
26216 else if (TARGET_THUMB1_ONLY
)
26217 fputs ("\tmov r12, r3\n", file
);
26219 if (TARGET_THUMB1_ONLY
)
26221 if (mi_delta
> 255)
26223 fputs ("\tldr\tr3, ", file
);
26224 assemble_name (file
, label
);
26225 fputs ("+4\n", file
);
26226 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26227 mi_op
, this_regno
, this_regno
);
26229 else if (mi_delta
!= 0)
26231 /* Thumb1 unified syntax requires s suffix in instruction name when
26232 one of the operands is immediate. */
26233 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26234 mi_op
, this_regno
, this_regno
,
26240 /* TODO: Use movw/movt for large constants when available. */
26241 while (mi_delta
!= 0)
26243 if ((mi_delta
& (3 << shift
)) == 0)
26247 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26248 mi_op
, this_regno
, this_regno
,
26249 mi_delta
& (0xff << shift
));
26250 mi_delta
&= ~(0xff << shift
);
26257 if (TARGET_THUMB1_ONLY
)
26258 fputs ("\tpop\t{r3}\n", file
);
26260 fprintf (file
, "\tbx\tr12\n");
26261 ASM_OUTPUT_ALIGN (file
, 2);
26262 assemble_name (file
, label
);
26263 fputs (":\n", file
);
26266 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26267 rtx tem
= XEXP (DECL_RTL (function
), 0);
26268 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26269 pipeline offset is four rather than eight. Adjust the offset
26271 tem
= plus_constant (GET_MODE (tem
), tem
,
26272 TARGET_THUMB1_ONLY
? -3 : -7);
26273 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26275 gen_rtx_SYMBOL_REF (Pmode
,
26276 ggc_strdup (labelpc
)));
26277 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26280 /* Output ".word .LTHUNKn". */
26281 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26283 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26284 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26288 fputs ("\tb\t", file
);
26289 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26290 if (NEED_PLT_RELOC
)
26291 fputs ("(PLT)", file
);
26292 fputc ('\n', file
);
26295 final_end_function ();
26298 /* MI thunk handling for TARGET_32BIT. */
26301 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26302 HOST_WIDE_INT vcall_offset
, tree function
)
26304 /* On ARM, this_regno is R0 or R1 depending on
26305 whether the function returns an aggregate or not.
26307 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26309 ? R1_REGNUM
: R0_REGNUM
);
26311 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26312 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26313 reload_completed
= 1;
26314 emit_note (NOTE_INSN_PROLOGUE_END
);
26316 /* Add DELTA to THIS_RTX. */
26318 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26319 delta
, this_rtx
, this_rtx
, false);
26321 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26322 if (vcall_offset
!= 0)
26324 /* Load *THIS_RTX. */
26325 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26326 /* Compute *THIS_RTX + VCALL_OFFSET. */
26327 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26329 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26330 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26331 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26334 /* Generate a tail call to the target function. */
26335 if (!TREE_USED (function
))
26337 assemble_external (function
);
26338 TREE_USED (function
) = 1;
26340 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26341 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26342 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26343 SIBLING_CALL_P (insn
) = 1;
26345 insn
= get_insns ();
26346 shorten_branches (insn
);
26347 final_start_function (insn
, file
, 1);
26348 final (insn
, file
, 1);
26349 final_end_function ();
26351 /* Stop pretending this is a post-reload pass. */
26352 reload_completed
= 0;
26355 /* Output code to add DELTA to the first argument, and then jump
26356 to FUNCTION. Used for C++ multiple inheritance. */
26359 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26360 HOST_WIDE_INT vcall_offset
, tree function
)
26363 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26365 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26369 arm_emit_vector_const (FILE *file
, rtx x
)
26372 const char * pattern
;
26374 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26376 switch (GET_MODE (x
))
26378 case V2SImode
: pattern
= "%08x"; break;
26379 case V4HImode
: pattern
= "%04x"; break;
26380 case V8QImode
: pattern
= "%02x"; break;
26381 default: gcc_unreachable ();
26384 fprintf (file
, "0x");
26385 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26389 element
= CONST_VECTOR_ELT (x
, i
);
26390 fprintf (file
, pattern
, INTVAL (element
));
26396 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26397 HFmode constant pool entries are actually loaded with ldr. */
26399 arm_emit_fp16_const (rtx c
)
26403 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26404 if (WORDS_BIG_ENDIAN
)
26405 assemble_zeros (2);
26406 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26407 if (!WORDS_BIG_ENDIAN
)
26408 assemble_zeros (2);
26412 arm_output_load_gr (rtx
*operands
)
26419 if (!MEM_P (operands
[1])
26420 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26421 || !REG_P (reg
= XEXP (sum
, 0))
26422 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26423 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26424 return "wldrw%?\t%0, %1";
26426 /* Fix up an out-of-range load of a GR register. */
26427 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26428 wcgr
= operands
[0];
26430 output_asm_insn ("ldr%?\t%0, %1", operands
);
26432 operands
[0] = wcgr
;
26434 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26435 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26440 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26442 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26443 named arg and all anonymous args onto the stack.
26444 XXX I know the prologue shouldn't be pushing registers, but it is faster
26448 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26452 int second_time ATTRIBUTE_UNUSED
)
26454 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26457 cfun
->machine
->uses_anonymous_args
= 1;
26458 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26460 nregs
= pcum
->aapcs_ncrn
;
26461 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
26465 nregs
= pcum
->nregs
;
26467 if (nregs
< NUM_ARG_REGS
)
26468 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26471 /* We can't rely on the caller doing the proper promotion when
26472 using APCS or ATPCS. */
26475 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26477 return !TARGET_AAPCS_BASED
;
26480 static machine_mode
26481 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26483 int *punsignedp ATTRIBUTE_UNUSED
,
26484 const_tree fntype ATTRIBUTE_UNUSED
,
26485 int for_return ATTRIBUTE_UNUSED
)
26487 if (GET_MODE_CLASS (mode
) == MODE_INT
26488 && GET_MODE_SIZE (mode
) < 4)
26494 /* AAPCS based ABIs use short enums by default. */
26497 arm_default_short_enums (void)
26499 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26503 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26506 arm_align_anon_bitfield (void)
26508 return TARGET_AAPCS_BASED
;
26512 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26515 arm_cxx_guard_type (void)
26517 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26521 /* The EABI says test the least significant bit of a guard variable. */
26524 arm_cxx_guard_mask_bit (void)
26526 return TARGET_AAPCS_BASED
;
26530 /* The EABI specifies that all array cookies are 8 bytes long. */
26533 arm_get_cookie_size (tree type
)
26537 if (!TARGET_AAPCS_BASED
)
26538 return default_cxx_get_cookie_size (type
);
26540 size
= build_int_cst (sizetype
, 8);
26545 /* The EABI says that array cookies should also contain the element size. */
26548 arm_cookie_has_size (void)
26550 return TARGET_AAPCS_BASED
;
26554 /* The EABI says constructors and destructors should return a pointer to
26555 the object constructed/destroyed. */
26558 arm_cxx_cdtor_returns_this (void)
26560 return TARGET_AAPCS_BASED
;
26563 /* The EABI says that an inline function may never be the key
26567 arm_cxx_key_method_may_be_inline (void)
26569 return !TARGET_AAPCS_BASED
;
26573 arm_cxx_determine_class_data_visibility (tree decl
)
26575 if (!TARGET_AAPCS_BASED
26576 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26579 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26580 is exported. However, on systems without dynamic vague linkage,
26581 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26582 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26583 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26585 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26586 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26590 arm_cxx_class_data_always_comdat (void)
26592 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26593 vague linkage if the class has no key function. */
26594 return !TARGET_AAPCS_BASED
;
26598 /* The EABI says __aeabi_atexit should be used to register static
26602 arm_cxx_use_aeabi_atexit (void)
26604 return TARGET_AAPCS_BASED
;
26609 arm_set_return_address (rtx source
, rtx scratch
)
26611 arm_stack_offsets
*offsets
;
26612 HOST_WIDE_INT delta
;
26614 unsigned long saved_regs
;
26616 offsets
= arm_get_frame_offsets ();
26617 saved_regs
= offsets
->saved_regs_mask
;
26619 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26620 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26623 if (frame_pointer_needed
)
26624 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26627 /* LR will be the first saved register. */
26628 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26633 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26634 GEN_INT (delta
& ~4095)));
26639 addr
= stack_pointer_rtx
;
26641 addr
= plus_constant (Pmode
, addr
, delta
);
26643 /* The store needs to be marked as frame related in order to prevent
26644 DSE from deleting it as dead if it is based on fp. */
26645 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26646 RTX_FRAME_RELATED_P (insn
) = 1;
26647 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26653 thumb_set_return_address (rtx source
, rtx scratch
)
26655 arm_stack_offsets
*offsets
;
26656 HOST_WIDE_INT delta
;
26657 HOST_WIDE_INT limit
;
26660 unsigned long mask
;
26664 offsets
= arm_get_frame_offsets ();
26665 mask
= offsets
->saved_regs_mask
;
26666 if (mask
& (1 << LR_REGNUM
))
26669 /* Find the saved regs. */
26670 if (frame_pointer_needed
)
26672 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26673 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26679 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26682 /* Allow for the stack frame. */
26683 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26685 /* The link register is always the first saved register. */
26688 /* Construct the address. */
26689 addr
= gen_rtx_REG (SImode
, reg
);
26692 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26693 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26697 addr
= plus_constant (Pmode
, addr
, delta
);
26699 /* The store needs to be marked as frame related in order to prevent
26700 DSE from deleting it as dead if it is based on fp. */
26701 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26702 RTX_FRAME_RELATED_P (insn
) = 1;
26703 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26706 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26709 /* Implements target hook vector_mode_supported_p. */
26711 arm_vector_mode_supported_p (machine_mode mode
)
26713 /* Neon also supports V2SImode, etc. listed in the clause below. */
26714 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26715 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26716 || mode
== V2DImode
|| mode
== V8HFmode
))
26719 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26720 && ((mode
== V2SImode
)
26721 || (mode
== V4HImode
)
26722 || (mode
== V8QImode
)))
26725 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26726 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26727 || mode
== V2HAmode
))
26733 /* Implements target hook array_mode_supported_p. */
26736 arm_array_mode_supported_p (machine_mode mode
,
26737 unsigned HOST_WIDE_INT nelems
)
26740 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26741 && (nelems
>= 2 && nelems
<= 4))
26747 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26748 registers when autovectorizing for Neon, at least until multiple vector
26749 widths are supported properly by the middle-end. */
26751 static machine_mode
26752 arm_preferred_simd_mode (machine_mode mode
)
26758 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26760 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26762 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26764 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26766 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26773 if (TARGET_REALLY_IWMMXT
)
26789 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26791 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26792 using r0-r4 for function arguments, r7 for the stack frame and don't have
26793 enough left over to do doubleword arithmetic. For Thumb-2 all the
26794 potentially problematic instructions accept high registers so this is not
26795 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26796 that require many low registers. */
26798 arm_class_likely_spilled_p (reg_class_t rclass
)
26800 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26801 || rclass
== CC_REG
)
26807 /* Implements target hook small_register_classes_for_mode_p. */
26809 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26811 return TARGET_THUMB1
;
26814 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26815 ARM insns and therefore guarantee that the shift count is modulo 256.
26816 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26817 guarantee no particular behavior for out-of-range counts. */
26819 static unsigned HOST_WIDE_INT
26820 arm_shift_truncation_mask (machine_mode mode
)
26822 return mode
== SImode
? 255 : 0;
26826 /* Map internal gcc register numbers to DWARF2 register numbers. */
26829 arm_dbx_register_number (unsigned int regno
)
26834 if (IS_VFP_REGNUM (regno
))
26836 /* See comment in arm_dwarf_register_span. */
26837 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26838 return 64 + regno
- FIRST_VFP_REGNUM
;
26840 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26843 if (IS_IWMMXT_GR_REGNUM (regno
))
26844 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26846 if (IS_IWMMXT_REGNUM (regno
))
26847 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26849 return DWARF_FRAME_REGISTERS
;
26852 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26853 GCC models tham as 64 32-bit registers, so we need to describe this to
26854 the DWARF generation code. Other registers can use the default. */
26856 arm_dwarf_register_span (rtx rtl
)
26864 regno
= REGNO (rtl
);
26865 if (!IS_VFP_REGNUM (regno
))
26868 /* XXX FIXME: The EABI defines two VFP register ranges:
26869 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26871 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26872 corresponding D register. Until GDB supports this, we shall use the
26873 legacy encodings. We also use these encodings for D0-D15 for
26874 compatibility with older debuggers. */
26875 mode
= GET_MODE (rtl
);
26876 if (GET_MODE_SIZE (mode
) < 8)
26879 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26881 nregs
= GET_MODE_SIZE (mode
) / 4;
26882 for (i
= 0; i
< nregs
; i
+= 2)
26883 if (TARGET_BIG_END
)
26885 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26886 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26890 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26891 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26896 nregs
= GET_MODE_SIZE (mode
) / 8;
26897 for (i
= 0; i
< nregs
; i
++)
26898 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26901 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26904 #if ARM_UNWIND_INFO
26905 /* Emit unwind directives for a store-multiple instruction or stack pointer
26906 push during alignment.
26907 These should only ever be generated by the function prologue code, so
26908 expect them to have a particular form.
26909 The store-multiple instruction sometimes pushes pc as the last register,
26910 although it should not be tracked into unwind information, or for -Os
26911 sometimes pushes some dummy registers before first register that needs
26912 to be tracked in unwind information; such dummy registers are there just
26913 to avoid separate stack adjustment, and will not be restored in the
26917 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26920 HOST_WIDE_INT offset
;
26921 HOST_WIDE_INT nregs
;
26925 unsigned padfirst
= 0, padlast
= 0;
26928 e
= XVECEXP (p
, 0, 0);
26929 gcc_assert (GET_CODE (e
) == SET
);
26931 /* First insn will adjust the stack pointer. */
26932 gcc_assert (GET_CODE (e
) == SET
26933 && REG_P (SET_DEST (e
))
26934 && REGNO (SET_DEST (e
)) == SP_REGNUM
26935 && GET_CODE (SET_SRC (e
)) == PLUS
);
26937 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26938 nregs
= XVECLEN (p
, 0) - 1;
26939 gcc_assert (nregs
);
26941 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26944 /* For -Os dummy registers can be pushed at the beginning to
26945 avoid separate stack pointer adjustment. */
26946 e
= XVECEXP (p
, 0, 1);
26947 e
= XEXP (SET_DEST (e
), 0);
26948 if (GET_CODE (e
) == PLUS
)
26949 padfirst
= INTVAL (XEXP (e
, 1));
26950 gcc_assert (padfirst
== 0 || optimize_size
);
26951 /* The function prologue may also push pc, but not annotate it as it is
26952 never restored. We turn this into a stack pointer adjustment. */
26953 e
= XVECEXP (p
, 0, nregs
);
26954 e
= XEXP (SET_DEST (e
), 0);
26955 if (GET_CODE (e
) == PLUS
)
26956 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26958 padlast
= offset
- 4;
26959 gcc_assert (padlast
== 0 || padlast
== 4);
26961 fprintf (asm_out_file
, "\t.pad #4\n");
26963 fprintf (asm_out_file
, "\t.save {");
26965 else if (IS_VFP_REGNUM (reg
))
26968 fprintf (asm_out_file
, "\t.vsave {");
26971 /* Unknown register type. */
26972 gcc_unreachable ();
26974 /* If the stack increment doesn't match the size of the saved registers,
26975 something has gone horribly wrong. */
26976 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26980 /* The remaining insns will describe the stores. */
26981 for (i
= 1; i
<= nregs
; i
++)
26983 /* Expect (set (mem <addr>) (reg)).
26984 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26985 e
= XVECEXP (p
, 0, i
);
26986 gcc_assert (GET_CODE (e
) == SET
26987 && MEM_P (SET_DEST (e
))
26988 && REG_P (SET_SRC (e
)));
26990 reg
= REGNO (SET_SRC (e
));
26991 gcc_assert (reg
>= lastreg
);
26994 fprintf (asm_out_file
, ", ");
26995 /* We can't use %r for vfp because we need to use the
26996 double precision register names. */
26997 if (IS_VFP_REGNUM (reg
))
26998 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27000 asm_fprintf (asm_out_file
, "%r", reg
);
27004 /* Check that the addresses are consecutive. */
27005 e
= XEXP (SET_DEST (e
), 0);
27006 if (GET_CODE (e
) == PLUS
)
27007 gcc_assert (REG_P (XEXP (e
, 0))
27008 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27009 && CONST_INT_P (XEXP (e
, 1))
27010 && offset
== INTVAL (XEXP (e
, 1)));
27014 && REGNO (e
) == SP_REGNUM
);
27015 offset
+= reg_size
;
27018 fprintf (asm_out_file
, "}\n");
27020 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27023 /* Emit unwind directives for a SET. */
27026 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27034 switch (GET_CODE (e0
))
27037 /* Pushing a single register. */
27038 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27039 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27040 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27043 asm_fprintf (asm_out_file
, "\t.save ");
27044 if (IS_VFP_REGNUM (REGNO (e1
)))
27045 asm_fprintf(asm_out_file
, "{d%d}\n",
27046 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27048 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27052 if (REGNO (e0
) == SP_REGNUM
)
27054 /* A stack increment. */
27055 if (GET_CODE (e1
) != PLUS
27056 || !REG_P (XEXP (e1
, 0))
27057 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27058 || !CONST_INT_P (XEXP (e1
, 1)))
27061 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27062 -INTVAL (XEXP (e1
, 1)));
27064 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27066 HOST_WIDE_INT offset
;
27068 if (GET_CODE (e1
) == PLUS
)
27070 if (!REG_P (XEXP (e1
, 0))
27071 || !CONST_INT_P (XEXP (e1
, 1)))
27073 reg
= REGNO (XEXP (e1
, 0));
27074 offset
= INTVAL (XEXP (e1
, 1));
27075 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27076 HARD_FRAME_POINTER_REGNUM
, reg
,
27079 else if (REG_P (e1
))
27082 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27083 HARD_FRAME_POINTER_REGNUM
, reg
);
27088 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27090 /* Move from sp to reg. */
27091 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27093 else if (GET_CODE (e1
) == PLUS
27094 && REG_P (XEXP (e1
, 0))
27095 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27096 && CONST_INT_P (XEXP (e1
, 1)))
27098 /* Set reg to offset from sp. */
27099 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27100 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27112 /* Emit unwind directives for the given insn. */
27115 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27118 bool handled_one
= false;
27120 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27123 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27124 && (TREE_NOTHROW (current_function_decl
)
27125 || crtl
->all_throwers_are_sibcalls
))
27128 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27131 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27133 switch (REG_NOTE_KIND (note
))
27135 case REG_FRAME_RELATED_EXPR
:
27136 pat
= XEXP (note
, 0);
27139 case REG_CFA_REGISTER
:
27140 pat
= XEXP (note
, 0);
27143 pat
= PATTERN (insn
);
27144 if (GET_CODE (pat
) == PARALLEL
)
27145 pat
= XVECEXP (pat
, 0, 0);
27148 /* Only emitted for IS_STACKALIGN re-alignment. */
27153 src
= SET_SRC (pat
);
27154 dest
= SET_DEST (pat
);
27156 gcc_assert (src
== stack_pointer_rtx
);
27157 reg
= REGNO (dest
);
27158 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27161 handled_one
= true;
27164 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27165 to get correct dwarf information for shrink-wrap. We should not
27166 emit unwind information for it because these are used either for
27167 pretend arguments or notes to adjust sp and restore registers from
27169 case REG_CFA_DEF_CFA
:
27170 case REG_CFA_ADJUST_CFA
:
27171 case REG_CFA_RESTORE
:
27174 case REG_CFA_EXPRESSION
:
27175 case REG_CFA_OFFSET
:
27176 /* ??? Only handling here what we actually emit. */
27177 gcc_unreachable ();
27185 pat
= PATTERN (insn
);
27188 switch (GET_CODE (pat
))
27191 arm_unwind_emit_set (asm_out_file
, pat
);
27195 /* Store multiple. */
27196 arm_unwind_emit_sequence (asm_out_file
, pat
);
27205 /* Output a reference from a function exception table to the type_info
27206 object X. The EABI specifies that the symbol should be relocated by
27207 an R_ARM_TARGET2 relocation. */
27210 arm_output_ttype (rtx x
)
27212 fputs ("\t.word\t", asm_out_file
);
27213 output_addr_const (asm_out_file
, x
);
27214 /* Use special relocations for symbol references. */
27215 if (!CONST_INT_P (x
))
27216 fputs ("(TARGET2)", asm_out_file
);
27217 fputc ('\n', asm_out_file
);
27222 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27225 arm_asm_emit_except_personality (rtx personality
)
27227 fputs ("\t.personality\t", asm_out_file
);
27228 output_addr_const (asm_out_file
, personality
);
27229 fputc ('\n', asm_out_file
);
27232 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27235 arm_asm_init_sections (void)
27237 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27240 #endif /* ARM_UNWIND_INFO */
27242 /* Output unwind directives for the start/end of a function. */
27245 arm_output_fn_unwind (FILE * f
, bool prologue
)
27247 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27251 fputs ("\t.fnstart\n", f
);
27254 /* If this function will never be unwound, then mark it as such.
27255 The came condition is used in arm_unwind_emit to suppress
27256 the frame annotations. */
27257 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27258 && (TREE_NOTHROW (current_function_decl
)
27259 || crtl
->all_throwers_are_sibcalls
))
27260 fputs("\t.cantunwind\n", f
);
27262 fputs ("\t.fnend\n", f
);
27267 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27269 enum tls_reloc reloc
;
27272 val
= XVECEXP (x
, 0, 0);
27273 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27275 output_addr_const (fp
, val
);
27280 fputs ("(tlsgd)", fp
);
27283 fputs ("(tlsldm)", fp
);
27286 fputs ("(tlsldo)", fp
);
27289 fputs ("(gottpoff)", fp
);
27292 fputs ("(tpoff)", fp
);
27295 fputs ("(tlsdesc)", fp
);
27298 gcc_unreachable ();
27307 fputs (" + (. - ", fp
);
27308 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27309 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27310 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27311 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27321 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27324 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27326 gcc_assert (size
== 4);
27327 fputs ("\t.word\t", file
);
27328 output_addr_const (file
, x
);
27329 fputs ("(tlsldo)", file
);
27332 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27335 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27337 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27338 return arm_emit_tls_decoration (fp
, x
);
27339 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27342 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27344 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27345 assemble_name_raw (fp
, label
);
27349 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27351 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27355 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27359 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27361 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27365 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27369 else if (GET_CODE (x
) == CONST_VECTOR
)
27370 return arm_emit_vector_const (fp
, x
);
27375 /* Output assembly for a shift instruction.
27376 SET_FLAGS determines how the instruction modifies the condition codes.
27377 0 - Do not set condition codes.
27378 1 - Set condition codes.
27379 2 - Use smallest instruction. */
27381 arm_output_shift(rtx
* operands
, int set_flags
)
27384 static const char flag_chars
[3] = {'?', '.', '!'};
27389 c
= flag_chars
[set_flags
];
27390 shift
= shift_op(operands
[3], &val
);
27394 operands
[2] = GEN_INT(val
);
27395 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27398 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27400 output_asm_insn (pattern
, operands
);
27404 /* Output assembly for a WMMX immediate shift instruction. */
27406 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27408 int shift
= INTVAL (operands
[2]);
27410 machine_mode opmode
= GET_MODE (operands
[0]);
27412 gcc_assert (shift
>= 0);
27414 /* If the shift value in the register versions is > 63 (for D qualifier),
27415 31 (for W qualifier) or 15 (for H qualifier). */
27416 if (((opmode
== V4HImode
) && (shift
> 15))
27417 || ((opmode
== V2SImode
) && (shift
> 31))
27418 || ((opmode
== DImode
) && (shift
> 63)))
27422 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27423 output_asm_insn (templ
, operands
);
27424 if (opmode
== DImode
)
27426 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27427 output_asm_insn (templ
, operands
);
27432 /* The destination register will contain all zeros. */
27433 sprintf (templ
, "wzero\t%%0");
27434 output_asm_insn (templ
, operands
);
27439 if ((opmode
== DImode
) && (shift
> 32))
27441 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27442 output_asm_insn (templ
, operands
);
27443 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27444 output_asm_insn (templ
, operands
);
27448 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27449 output_asm_insn (templ
, operands
);
27454 /* Output assembly for a WMMX tinsr instruction. */
27456 arm_output_iwmmxt_tinsr (rtx
*operands
)
27458 int mask
= INTVAL (operands
[3]);
27461 int units
= mode_nunits
[GET_MODE (operands
[0])];
27462 gcc_assert ((mask
& (mask
- 1)) == 0);
27463 for (i
= 0; i
< units
; ++i
)
27465 if ((mask
& 0x01) == 1)
27471 gcc_assert (i
< units
);
27473 switch (GET_MODE (operands
[0]))
27476 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27479 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27482 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27485 gcc_unreachable ();
27488 output_asm_insn (templ
, operands
);
27493 /* Output a Thumb-1 casesi dispatch sequence. */
27495 thumb1_output_casesi (rtx
*operands
)
27497 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27499 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27501 switch (GET_MODE(diff_vec
))
27504 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27505 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27507 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27508 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27510 return "bl\t%___gnu_thumb1_case_si";
27512 gcc_unreachable ();
27516 /* Output a Thumb-2 casesi instruction. */
27518 thumb2_output_casesi (rtx
*operands
)
27520 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27522 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27524 output_asm_insn ("cmp\t%0, %1", operands
);
27525 output_asm_insn ("bhi\t%l3", operands
);
27526 switch (GET_MODE(diff_vec
))
27529 return "tbb\t[%|pc, %0]";
27531 return "tbh\t[%|pc, %0, lsl #1]";
27535 output_asm_insn ("adr\t%4, %l2", operands
);
27536 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27537 output_asm_insn ("add\t%4, %4, %5", operands
);
27542 output_asm_insn ("adr\t%4, %l2", operands
);
27543 return "ldr\t%|pc, [%4, %0, lsl #2]";
27546 gcc_unreachable ();
27550 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27551 per-core tuning structs. */
27553 arm_issue_rate (void)
27555 return current_tune
->issue_rate
;
27558 /* Return how many instructions should scheduler lookahead to choose the
27561 arm_first_cycle_multipass_dfa_lookahead (void)
27563 int issue_rate
= arm_issue_rate ();
27565 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27568 /* Enable modeling of L2 auto-prefetcher. */
27570 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27572 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27576 arm_mangle_type (const_tree type
)
27578 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27579 has to be managled as if it is in the "std" namespace. */
27580 if (TARGET_AAPCS_BASED
27581 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27582 return "St9__va_list";
27584 /* Half-precision float. */
27585 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27588 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27590 if (TYPE_NAME (type
) != NULL
)
27591 return arm_mangle_builtin_type (type
);
27593 /* Use the default mangling. */
27597 /* Order of allocation of core registers for Thumb: this allocation is
27598 written over the corresponding initial entries of the array
27599 initialized with REG_ALLOC_ORDER. We allocate all low registers
27600 first. Saving and restoring a low register is usually cheaper than
27601 using a call-clobbered high register. */
27603 static const int thumb_core_reg_alloc_order
[] =
27605 3, 2, 1, 0, 4, 5, 6, 7,
27606 14, 12, 8, 9, 10, 11
27609 /* Adjust register allocation order when compiling for Thumb. */
27612 arm_order_regs_for_local_alloc (void)
27614 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27615 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27617 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27618 sizeof (thumb_core_reg_alloc_order
));
27621 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27624 arm_frame_pointer_required (void)
27626 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27629 /* If the function receives nonlocal gotos, it needs to save the frame
27630 pointer in the nonlocal_goto_save_area object. */
27631 if (cfun
->has_nonlocal_label
)
27634 /* The frame pointer is required for non-leaf APCS frames. */
27635 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !leaf_function_p ())
27638 /* If we are probing the stack in the prologue, we will have a faulting
27639 instruction prior to the stack adjustment and this requires a frame
27640 pointer if we want to catch the exception using the EABI unwinder. */
27641 if (!IS_INTERRUPT (arm_current_func_type ())
27642 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27643 && arm_except_unwind_info (&global_options
) == UI_TARGET
27644 && cfun
->can_throw_non_call_exceptions
)
27646 HOST_WIDE_INT size
= get_frame_size ();
27648 /* That's irrelevant if there is no stack adjustment. */
27652 /* That's relevant only if there is a stack probe. */
27653 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27655 /* We don't have the final size of the frame so adjust. */
27656 size
+= 32 * UNITS_PER_WORD
;
27657 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27667 /* Only thumb1 can't support conditional execution, so return true if
27668 the target is not thumb1. */
27670 arm_have_conditional_execution (void)
27672 return !TARGET_THUMB1
;
27675 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27676 static HOST_WIDE_INT
27677 arm_vector_alignment (const_tree type
)
27679 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27681 if (TARGET_AAPCS_BASED
)
27682 align
= MIN (align
, 64);
27687 static unsigned int
27688 arm_autovectorize_vector_sizes (void)
27690 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27694 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27696 /* Vectors which aren't in packed structures will not be less aligned than
27697 the natural alignment of their element type, so this is safe. */
27698 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27701 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27705 arm_builtin_support_vector_misalignment (machine_mode mode
,
27706 const_tree type
, int misalignment
,
27709 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27711 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27716 /* If the misalignment is unknown, we should be able to handle the access
27717 so long as it is not to a member of a packed data structure. */
27718 if (misalignment
== -1)
27721 /* Return true if the misalignment is a multiple of the natural alignment
27722 of the vector's element type. This is probably always going to be
27723 true in practice, since we've already established that this isn't a
27725 return ((misalignment
% align
) == 0);
27728 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27733 arm_conditional_register_usage (void)
27737 if (TARGET_THUMB1
&& optimize_size
)
27739 /* When optimizing for size on Thumb-1, it's better not
27740 to use the HI regs, because of the overhead of
27742 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27743 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27746 /* The link register can be clobbered by any branch insn,
27747 but we have no way to track that at present, so mark
27748 it as unavailable. */
27750 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27752 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27754 /* VFPv3 registers are disabled when earlier VFP
27755 versions are selected due to the definition of
27756 LAST_VFP_REGNUM. */
27757 for (regno
= FIRST_VFP_REGNUM
;
27758 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27760 fixed_regs
[regno
] = 0;
27761 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27762 || regno
>= FIRST_VFP_REGNUM
+ 32;
27766 if (TARGET_REALLY_IWMMXT
)
27768 regno
= FIRST_IWMMXT_GR_REGNUM
;
27769 /* The 2002/10/09 revision of the XScale ABI has wCG0
27770 and wCG1 as call-preserved registers. The 2002/11/21
27771 revision changed this so that all wCG registers are
27772 scratch registers. */
27773 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27774 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27775 fixed_regs
[regno
] = 0;
27776 /* The XScale ABI has wR0 - wR9 as scratch registers,
27777 the rest as call-preserved registers. */
27778 for (regno
= FIRST_IWMMXT_REGNUM
;
27779 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27781 fixed_regs
[regno
] = 0;
27782 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27786 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27788 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27789 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27791 else if (TARGET_APCS_STACK
)
27793 fixed_regs
[10] = 1;
27794 call_used_regs
[10] = 1;
27796 /* -mcaller-super-interworking reserves r11 for calls to
27797 _interwork_r11_call_via_rN(). Making the register global
27798 is an easy way of ensuring that it remains valid for all
27800 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27801 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27803 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27804 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27805 if (TARGET_CALLER_INTERWORKING
)
27806 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27808 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27812 arm_preferred_rename_class (reg_class_t rclass
)
27814 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27815 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27816 and code size can be reduced. */
27817 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27823 /* Compute the attribute "length" of insn "*push_multi".
27824 So this function MUST be kept in sync with that insn pattern. */
27826 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27828 int i
, regno
, hi_reg
;
27829 int num_saves
= XVECLEN (parallel_op
, 0);
27839 regno
= REGNO (first_op
);
27840 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27841 list is 8-bit. Normally this means all registers in the list must be
27842 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27843 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27844 with 16-bit encoding. */
27845 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27846 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27848 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27849 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27857 /* Compute the attribute "length" of insn. Currently, this function is used
27858 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27859 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27860 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27861 true if OPERANDS contains insn which explicit updates base register. */
27864 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
27873 rtx parallel_op
= operands
[0];
27874 /* Initialize to elements number of PARALLEL. */
27875 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
27876 /* Initialize the value to base register. */
27877 unsigned regno
= REGNO (operands
[1]);
27878 /* Skip return and write back pattern.
27879 We only need register pop pattern for later analysis. */
27880 unsigned first_indx
= 0;
27881 first_indx
+= return_pc
? 1 : 0;
27882 first_indx
+= write_back_p
? 1 : 0;
27884 /* A pop operation can be done through LDM or POP. If the base register is SP
27885 and if it's with write back, then a LDM will be alias of POP. */
27886 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
27887 bool ldm_p
= !pop_p
;
27889 /* Check base register for LDM. */
27890 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
27893 /* Check each register in the list. */
27894 for (; indx
>= first_indx
; indx
--)
27896 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
27897 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27898 comment in arm_attr_length_push_multi. */
27899 if (REGNO_REG_CLASS (regno
) == HI_REGS
27900 && (regno
!= PC_REGNUM
|| ldm_p
))
27907 /* Compute the number of instructions emitted by output_move_double. */
27909 arm_count_output_move_double_insns (rtx
*operands
)
27913 /* output_move_double may modify the operands array, so call it
27914 here on a copy of the array. */
27915 ops
[0] = operands
[0];
27916 ops
[1] = operands
[1];
27917 output_move_double (ops
, false, &count
);
27922 vfp3_const_double_for_fract_bits (rtx operand
)
27924 REAL_VALUE_TYPE r0
;
27926 if (!CONST_DOUBLE_P (operand
))
27929 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
27930 if (exact_real_inverse (DFmode
, &r0
)
27931 && !REAL_VALUE_NEGATIVE (r0
))
27933 if (exact_real_truncate (DFmode
, &r0
))
27935 HOST_WIDE_INT value
= real_to_integer (&r0
);
27936 value
= value
& 0xffffffff;
27937 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27939 int ret
= exact_log2 (value
);
27940 gcc_assert (IN_RANGE (ret
, 0, 31));
27948 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27949 log2 is in [1, 32], return that log2. Otherwise return -1.
27950 This is used in the patterns for vcvt.s32.f32 floating-point to
27951 fixed-point conversions. */
27954 vfp3_const_double_for_bits (rtx x
)
27956 const REAL_VALUE_TYPE
*r
;
27958 if (!CONST_DOUBLE_P (x
))
27961 r
= CONST_DOUBLE_REAL_VALUE (x
);
27963 if (REAL_VALUE_NEGATIVE (*r
)
27964 || REAL_VALUE_ISNAN (*r
)
27965 || REAL_VALUE_ISINF (*r
)
27966 || !real_isinteger (r
, SFmode
))
27969 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
27971 /* The exact_log2 above will have returned -1 if this is
27972 not an exact log2. */
27973 if (!IN_RANGE (hwint
, 1, 32))
27980 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27983 arm_pre_atomic_barrier (enum memmodel model
)
27985 if (need_atomic_barrier_p (model
, true))
27986 emit_insn (gen_memory_barrier ());
27990 arm_post_atomic_barrier (enum memmodel model
)
27992 if (need_atomic_barrier_p (model
, false))
27993 emit_insn (gen_memory_barrier ());
27996 /* Emit the load-exclusive and store-exclusive instructions.
27997 Use acquire and release versions if necessary. */
28000 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28002 rtx (*gen
) (rtx
, rtx
);
28008 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28009 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28010 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28011 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28013 gcc_unreachable ();
28020 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28021 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
28022 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
28023 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
28025 gcc_unreachable ();
28029 emit_insn (gen (rval
, mem
));
28033 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28036 rtx (*gen
) (rtx
, rtx
, rtx
);
28042 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28043 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28044 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28045 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28047 gcc_unreachable ();
28054 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28055 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
28056 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
28057 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
28059 gcc_unreachable ();
28063 emit_insn (gen (bval
, rval
, mem
));
28066 /* Mark the previous jump instruction as unlikely. */
28069 emit_unlikely_jump (rtx insn
)
28071 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
28073 insn
= emit_jump_insn (insn
);
28074 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
28077 /* Expand a compare and swap pattern. */
28080 arm_expand_compare_and_swap (rtx operands
[])
28082 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28084 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28086 bval
= operands
[0];
28087 rval
= operands
[1];
28089 oldval
= operands
[3];
28090 newval
= operands
[4];
28091 is_weak
= operands
[5];
28092 mod_s
= operands
[6];
28093 mod_f
= operands
[7];
28094 mode
= GET_MODE (mem
);
28096 /* Normally the succ memory model must be stronger than fail, but in the
28097 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28098 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28100 if (TARGET_HAVE_LDACQ
28101 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28102 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28103 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28109 /* For narrow modes, we're going to perform the comparison in SImode,
28110 so do the zero-extension now. */
28111 rval
= gen_reg_rtx (SImode
);
28112 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28116 /* Force the value into a register if needed. We waited until after
28117 the zero-extension above to do this properly. */
28118 if (!arm_add_operand (oldval
, SImode
))
28119 oldval
= force_reg (SImode
, oldval
);
28123 if (!cmpdi_operand (oldval
, mode
))
28124 oldval
= force_reg (mode
, oldval
);
28128 gcc_unreachable ();
28133 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
28134 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
28135 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
28136 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
28138 gcc_unreachable ();
28141 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28143 if (mode
== QImode
|| mode
== HImode
)
28144 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28146 /* In all cases, we arrange for success to be signaled by Z set.
28147 This arrangement allows for the boolean result to be used directly
28148 in a subsequent branch, post optimization. */
28149 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28150 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
28151 emit_insn (gen_rtx_SET (bval
, x
));
28154 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28155 another memory store between the load-exclusive and store-exclusive can
28156 reset the monitor from Exclusive to Open state. This means we must wait
28157 until after reload to split the pattern, lest we get a register spill in
28158 the middle of the atomic sequence. */
28161 arm_split_compare_and_swap (rtx operands
[])
28163 rtx rval
, mem
, oldval
, newval
, scratch
;
28165 enum memmodel mod_s
, mod_f
;
28167 rtx_code_label
*label1
, *label2
;
28170 rval
= operands
[0];
28172 oldval
= operands
[2];
28173 newval
= operands
[3];
28174 is_weak
= (operands
[4] != const0_rtx
);
28175 mod_s
= memmodel_from_int (INTVAL (operands
[5]));
28176 mod_f
= memmodel_from_int (INTVAL (operands
[6]));
28177 scratch
= operands
[7];
28178 mode
= GET_MODE (mem
);
28180 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28182 bool use_acquire
= TARGET_HAVE_LDACQ
28183 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28184 || is_mm_release (mod_s
));
28186 bool use_release
= TARGET_HAVE_LDACQ
28187 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28188 || is_mm_acquire (mod_s
));
28190 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28191 a full barrier is emitted after the store-release. */
28193 use_acquire
= false;
28195 /* Checks whether a barrier is needed and emits one accordingly. */
28196 if (!(use_acquire
|| use_release
))
28197 arm_pre_atomic_barrier (mod_s
);
28202 label1
= gen_label_rtx ();
28203 emit_label (label1
);
28205 label2
= gen_label_rtx ();
28207 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28209 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
28210 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28211 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28212 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28213 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28215 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
28217 /* Weak or strong, we want EQ to be true for success, so that we
28218 match the flags that we got from the compare above. */
28219 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28220 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
28221 emit_insn (gen_rtx_SET (cond
, x
));
28225 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28226 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28227 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
28228 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28231 if (!is_mm_relaxed (mod_f
))
28232 emit_label (label2
);
28234 /* Checks whether a barrier is needed and emits one accordingly. */
28236 || !(use_acquire
|| use_release
))
28237 arm_post_atomic_barrier (mod_s
);
28239 if (is_mm_relaxed (mod_f
))
28240 emit_label (label2
);
28244 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28245 rtx value
, rtx model_rtx
, rtx cond
)
28247 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28248 machine_mode mode
= GET_MODE (mem
);
28249 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28250 rtx_code_label
*label
;
28253 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28255 bool use_acquire
= TARGET_HAVE_LDACQ
28256 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28257 || is_mm_release (model
));
28259 bool use_release
= TARGET_HAVE_LDACQ
28260 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28261 || is_mm_acquire (model
));
28263 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28264 a full barrier is emitted after the store-release. */
28266 use_acquire
= false;
28268 /* Checks whether a barrier is needed and emits one accordingly. */
28269 if (!(use_acquire
|| use_release
))
28270 arm_pre_atomic_barrier (model
);
28272 label
= gen_label_rtx ();
28273 emit_label (label
);
28276 new_out
= gen_lowpart (wmode
, new_out
);
28278 old_out
= gen_lowpart (wmode
, old_out
);
28281 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28283 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28292 x
= gen_rtx_AND (wmode
, old_out
, value
);
28293 emit_insn (gen_rtx_SET (new_out
, x
));
28294 x
= gen_rtx_NOT (wmode
, new_out
);
28295 emit_insn (gen_rtx_SET (new_out
, x
));
28299 if (CONST_INT_P (value
))
28301 value
= GEN_INT (-INTVAL (value
));
28307 if (mode
== DImode
)
28309 /* DImode plus/minus need to clobber flags. */
28310 /* The adddi3 and subdi3 patterns are incorrectly written so that
28311 they require matching operands, even when we could easily support
28312 three operands. Thankfully, this can be fixed up post-splitting,
28313 as the individual add+adc patterns do accept three operands and
28314 post-reload cprop can make these moves go away. */
28315 emit_move_insn (new_out
, old_out
);
28317 x
= gen_adddi3 (new_out
, new_out
, value
);
28319 x
= gen_subdi3 (new_out
, new_out
, value
);
28326 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28327 emit_insn (gen_rtx_SET (new_out
, x
));
28331 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28334 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28335 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28337 /* Checks whether a barrier is needed and emits one accordingly. */
28339 || !(use_acquire
|| use_release
))
28340 arm_post_atomic_barrier (model
);
28343 #define MAX_VECT_LEN 16
28345 struct expand_vec_perm_d
28347 rtx target
, op0
, op1
;
28348 unsigned char perm
[MAX_VECT_LEN
];
28349 machine_mode vmode
;
28350 unsigned char nelt
;
28355 /* Generate a variable permutation. */
28358 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28360 machine_mode vmode
= GET_MODE (target
);
28361 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28363 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28364 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28365 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28366 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28367 gcc_checking_assert (TARGET_NEON
);
28371 if (vmode
== V8QImode
)
28372 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28374 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28380 if (vmode
== V8QImode
)
28382 pair
= gen_reg_rtx (V16QImode
);
28383 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28384 pair
= gen_lowpart (TImode
, pair
);
28385 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28389 pair
= gen_reg_rtx (OImode
);
28390 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28391 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28397 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28399 machine_mode vmode
= GET_MODE (target
);
28400 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28401 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28402 rtx rmask
[MAX_VECT_LEN
], mask
;
28404 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28405 numbering of elements for big-endian, we must reverse the order. */
28406 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28408 /* The VTBL instruction does not use a modulo index, so we must take care
28409 of that ourselves. */
28410 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28411 for (i
= 0; i
< nelt
; ++i
)
28413 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28414 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28416 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28419 /* Map lane ordering between architectural lane order, and GCC lane order,
28420 taking into account ABI. See comment above output_move_neon for details. */
28423 neon_endian_lane_map (machine_mode mode
, int lane
)
28425 if (BYTES_BIG_ENDIAN
)
28427 int nelems
= GET_MODE_NUNITS (mode
);
28428 /* Reverse lane order. */
28429 lane
= (nelems
- 1 - lane
);
28430 /* Reverse D register order, to match ABI. */
28431 if (GET_MODE_SIZE (mode
) == 16)
28432 lane
= lane
^ (nelems
/ 2);
28437 /* Some permutations index into pairs of vectors, this is a helper function
28438 to map indexes into those pairs of vectors. */
28441 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28443 int nelem
= GET_MODE_NUNITS (mode
);
28444 if (BYTES_BIG_ENDIAN
)
28446 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28450 /* Generate or test for an insn that supports a constant permutation. */
28452 /* Recognize patterns for the VUZP insns. */
28455 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28457 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28458 rtx out0
, out1
, in0
, in1
;
28459 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28463 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28466 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28467 big endian pattern on 64 bit vectors, so we correct for that. */
28468 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28469 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28471 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28473 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28475 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28479 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28481 for (i
= 0; i
< nelt
; i
++)
28484 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28485 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28495 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28496 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28497 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28498 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28499 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28500 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28501 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28502 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28504 gcc_unreachable ();
28509 if (swap_nelt
!= 0)
28510 std::swap (in0
, in1
);
28513 out1
= gen_reg_rtx (d
->vmode
);
28515 std::swap (out0
, out1
);
28517 emit_insn (gen (out0
, in0
, in1
, out1
));
28521 /* Recognize patterns for the VZIP insns. */
28524 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28526 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28527 rtx out0
, out1
, in0
, in1
;
28528 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28532 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28535 is_swapped
= BYTES_BIG_ENDIAN
;
28537 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28540 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28542 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28546 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28548 for (i
= 0; i
< nelt
/ 2; i
++)
28551 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28552 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28556 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28557 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28568 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28569 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28570 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28571 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28572 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28573 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28574 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28575 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28577 gcc_unreachable ();
28583 std::swap (in0
, in1
);
28586 out1
= gen_reg_rtx (d
->vmode
);
28588 std::swap (out0
, out1
);
28590 emit_insn (gen (out0
, in0
, in1
, out1
));
28594 /* Recognize patterns for the VREV insns. */
28597 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28599 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28600 rtx (*gen
)(rtx
, rtx
);
28602 if (!d
->one_vector_p
)
28611 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28612 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28620 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28621 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28622 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28623 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28631 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28632 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28633 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28634 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28635 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28636 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28637 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28638 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28647 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28648 for (j
= 0; j
<= diff
; j
+= 1)
28650 /* This is guaranteed to be true as the value of diff
28651 is 7, 3, 1 and we should have enough elements in the
28652 queue to generate this. Getting a vector mask with a
28653 value of diff other than these values implies that
28654 something is wrong by the time we get here. */
28655 gcc_assert (i
+ j
< nelt
);
28656 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28664 emit_insn (gen (d
->target
, d
->op0
));
28668 /* Recognize patterns for the VTRN insns. */
28671 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28673 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28674 rtx out0
, out1
, in0
, in1
;
28675 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28677 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28680 /* Note that these are little-endian tests. Adjust for big-endian later. */
28681 if (d
->perm
[0] == 0)
28683 else if (d
->perm
[0] == 1)
28687 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28689 for (i
= 0; i
< nelt
; i
+= 2)
28691 if (d
->perm
[i
] != i
+ odd
)
28693 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28703 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28704 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28705 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28706 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28707 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28708 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28709 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28710 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28712 gcc_unreachable ();
28717 if (BYTES_BIG_ENDIAN
)
28719 std::swap (in0
, in1
);
28724 out1
= gen_reg_rtx (d
->vmode
);
28726 std::swap (out0
, out1
);
28728 emit_insn (gen (out0
, in0
, in1
, out1
));
28732 /* Recognize patterns for the VEXT insns. */
28735 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28737 unsigned int i
, nelt
= d
->nelt
;
28738 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28741 unsigned int location
;
28743 unsigned int next
= d
->perm
[0] + 1;
28745 /* TODO: Handle GCC's numbering of elements for big-endian. */
28746 if (BYTES_BIG_ENDIAN
)
28749 /* Check if the extracted indexes are increasing by one. */
28750 for (i
= 1; i
< nelt
; next
++, i
++)
28752 /* If we hit the most significant element of the 2nd vector in
28753 the previous iteration, no need to test further. */
28754 if (next
== 2 * nelt
)
28757 /* If we are operating on only one vector: it could be a
28758 rotation. If there are only two elements of size < 64, let
28759 arm_evpc_neon_vrev catch it. */
28760 if (d
->one_vector_p
&& (next
== nelt
))
28762 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28768 if (d
->perm
[i
] != next
)
28772 location
= d
->perm
[0];
28776 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28777 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28778 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28779 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28780 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28781 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28782 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28783 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28784 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28793 offset
= GEN_INT (location
);
28794 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28798 /* The NEON VTBL instruction is a fully variable permuation that's even
28799 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28800 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28801 can do slightly better by expanding this as a constant where we don't
28802 have to apply a mask. */
28805 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28807 rtx rperm
[MAX_VECT_LEN
], sel
;
28808 machine_mode vmode
= d
->vmode
;
28809 unsigned int i
, nelt
= d
->nelt
;
28811 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28812 numbering of elements for big-endian, we must reverse the order. */
28813 if (BYTES_BIG_ENDIAN
)
28819 /* Generic code will try constant permutation twice. Once with the
28820 original mode and again with the elements lowered to QImode.
28821 So wait and don't do the selector expansion ourselves. */
28822 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28825 for (i
= 0; i
< nelt
; ++i
)
28826 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28827 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28828 sel
= force_reg (vmode
, sel
);
28830 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28835 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28837 /* Check if the input mask matches vext before reordering the
28840 if (arm_evpc_neon_vext (d
))
28843 /* The pattern matching functions above are written to look for a small
28844 number to begin the sequence (0, 1, N/2). If we begin with an index
28845 from the second operand, we can swap the operands. */
28846 if (d
->perm
[0] >= d
->nelt
)
28848 unsigned i
, nelt
= d
->nelt
;
28850 for (i
= 0; i
< nelt
; ++i
)
28851 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28853 std::swap (d
->op0
, d
->op1
);
28858 if (arm_evpc_neon_vuzp (d
))
28860 if (arm_evpc_neon_vzip (d
))
28862 if (arm_evpc_neon_vrev (d
))
28864 if (arm_evpc_neon_vtrn (d
))
28866 return arm_evpc_neon_vtbl (d
);
28871 /* Expand a vec_perm_const pattern. */
28874 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28876 struct expand_vec_perm_d d
;
28877 int i
, nelt
, which
;
28883 d
.vmode
= GET_MODE (target
);
28884 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28885 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28886 d
.testing_p
= false;
28888 for (i
= which
= 0; i
< nelt
; ++i
)
28890 rtx e
= XVECEXP (sel
, 0, i
);
28891 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28892 which
|= (ei
< nelt
? 1 : 2);
28902 d
.one_vector_p
= false;
28903 if (!rtx_equal_p (op0
, op1
))
28906 /* The elements of PERM do not suggest that only the first operand
28907 is used, but both operands are identical. Allow easier matching
28908 of the permutation by folding the permutation into the single
28912 for (i
= 0; i
< nelt
; ++i
)
28913 d
.perm
[i
] &= nelt
- 1;
28915 d
.one_vector_p
= true;
28920 d
.one_vector_p
= true;
28924 return arm_expand_vec_perm_const_1 (&d
);
28927 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28930 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28931 const unsigned char *sel
)
28933 struct expand_vec_perm_d d
;
28934 unsigned int i
, nelt
, which
;
28938 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28939 d
.testing_p
= true;
28940 memcpy (d
.perm
, sel
, nelt
);
28942 /* Categorize the set of elements in the selector. */
28943 for (i
= which
= 0; i
< nelt
; ++i
)
28945 unsigned char e
= d
.perm
[i
];
28946 gcc_assert (e
< 2 * nelt
);
28947 which
|= (e
< nelt
? 1 : 2);
28950 /* For all elements from second vector, fold the elements to first. */
28952 for (i
= 0; i
< nelt
; ++i
)
28955 /* Check whether the mask can be applied to the vector type. */
28956 d
.one_vector_p
= (which
!= 3);
28958 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28959 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28960 if (!d
.one_vector_p
)
28961 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28964 ret
= arm_expand_vec_perm_const_1 (&d
);
28971 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28973 /* If we are soft float and we do not have ldrd
28974 then all auto increment forms are ok. */
28975 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28980 /* Post increment and Pre Decrement are supported for all
28981 instruction forms except for vector forms. */
28984 if (VECTOR_MODE_P (mode
))
28986 if (code
!= ARM_PRE_DEC
)
28996 /* Without LDRD and mode size greater than
28997 word size, there is no point in auto-incrementing
28998 because ldm and stm will not have these forms. */
28999 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29002 /* Vector and floating point modes do not support
29003 these auto increment forms. */
29004 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29017 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29018 on ARM, since we know that shifts by negative amounts are no-ops.
29019 Additionally, the default expansion code is not available or suitable
29020 for post-reload insn splits (this can occur when the register allocator
29021 chooses not to do a shift in NEON).
29023 This function is used in both initial expand and post-reload splits, and
29024 handles all kinds of 64-bit shifts.
29026 Input requirements:
29027 - It is safe for the input and output to be the same register, but
29028 early-clobber rules apply for the shift amount and scratch registers.
29029 - Shift by register requires both scratch registers. In all other cases
29030 the scratch registers may be NULL.
29031 - Ashiftrt by a register also clobbers the CC register. */
29033 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29034 rtx amount
, rtx scratch1
, rtx scratch2
)
29036 rtx out_high
= gen_highpart (SImode
, out
);
29037 rtx out_low
= gen_lowpart (SImode
, out
);
29038 rtx in_high
= gen_highpart (SImode
, in
);
29039 rtx in_low
= gen_lowpart (SImode
, in
);
29042 in = the register pair containing the input value.
29043 out = the destination register pair.
29044 up = the high- or low-part of each pair.
29045 down = the opposite part to "up".
29046 In a shift, we can consider bits to shift from "up"-stream to
29047 "down"-stream, so in a left-shift "up" is the low-part and "down"
29048 is the high-part of each register pair. */
29050 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29051 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29052 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29053 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29055 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29057 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29058 && GET_MODE (out
) == DImode
);
29060 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29061 && GET_MODE (in
) == DImode
);
29063 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29064 && GET_MODE (amount
) == SImode
)
29065 || CONST_INT_P (amount
)));
29066 gcc_assert (scratch1
== NULL
29067 || (GET_CODE (scratch1
) == SCRATCH
)
29068 || (GET_MODE (scratch1
) == SImode
29069 && REG_P (scratch1
)));
29070 gcc_assert (scratch2
== NULL
29071 || (GET_CODE (scratch2
) == SCRATCH
)
29072 || (GET_MODE (scratch2
) == SImode
29073 && REG_P (scratch2
)));
29074 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29075 || !HARD_REGISTER_P (out
)
29076 || (REGNO (out
) != REGNO (amount
)
29077 && REGNO (out
) + 1 != REGNO (amount
)));
29079 /* Macros to make following code more readable. */
29080 #define SUB_32(DEST,SRC) \
29081 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29082 #define RSB_32(DEST,SRC) \
29083 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29084 #define SUB_S_32(DEST,SRC) \
29085 gen_addsi3_compare0 ((DEST), (SRC), \
29087 #define SET(DEST,SRC) \
29088 gen_rtx_SET ((DEST), (SRC))
29089 #define SHIFT(CODE,SRC,AMOUNT) \
29090 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29091 #define LSHIFT(CODE,SRC,AMOUNT) \
29092 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29093 SImode, (SRC), (AMOUNT))
29094 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29095 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29096 SImode, (SRC), (AMOUNT))
29098 gen_rtx_IOR (SImode, (A), (B))
29099 #define BRANCH(COND,LABEL) \
29100 gen_arm_cond_branch ((LABEL), \
29101 gen_rtx_ ## COND (CCmode, cc_reg, \
29105 /* Shifts by register and shifts by constant are handled separately. */
29106 if (CONST_INT_P (amount
))
29108 /* We have a shift-by-constant. */
29110 /* First, handle out-of-range shift amounts.
29111 In both cases we try to match the result an ARM instruction in a
29112 shift-by-register would give. This helps reduce execution
29113 differences between optimization levels, but it won't stop other
29114 parts of the compiler doing different things. This is "undefined
29115 behavior, in any case. */
29116 if (INTVAL (amount
) <= 0)
29117 emit_insn (gen_movdi (out
, in
));
29118 else if (INTVAL (amount
) >= 64)
29120 if (code
== ASHIFTRT
)
29122 rtx const31_rtx
= GEN_INT (31);
29123 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29124 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29127 emit_insn (gen_movdi (out
, const0_rtx
));
29130 /* Now handle valid shifts. */
29131 else if (INTVAL (amount
) < 32)
29133 /* Shifts by a constant less than 32. */
29134 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29136 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29137 emit_insn (SET (out_down
,
29138 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29140 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29144 /* Shifts by a constant greater than 31. */
29145 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29147 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29148 if (code
== ASHIFTRT
)
29149 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29152 emit_insn (SET (out_up
, const0_rtx
));
29157 /* We have a shift-by-register. */
29158 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29160 /* This alternative requires the scratch registers. */
29161 gcc_assert (scratch1
&& REG_P (scratch1
));
29162 gcc_assert (scratch2
&& REG_P (scratch2
));
29164 /* We will need the values "amount-32" and "32-amount" later.
29165 Swapping them around now allows the later code to be more general. */
29169 emit_insn (SUB_32 (scratch1
, amount
));
29170 emit_insn (RSB_32 (scratch2
, amount
));
29173 emit_insn (RSB_32 (scratch1
, amount
));
29174 /* Also set CC = amount > 32. */
29175 emit_insn (SUB_S_32 (scratch2
, amount
));
29178 emit_insn (RSB_32 (scratch1
, amount
));
29179 emit_insn (SUB_32 (scratch2
, amount
));
29182 gcc_unreachable ();
29185 /* Emit code like this:
29188 out_down = in_down << amount;
29189 out_down = (in_up << (amount - 32)) | out_down;
29190 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29191 out_up = in_up << amount;
29194 out_down = in_down >> amount;
29195 out_down = (in_up << (32 - amount)) | out_down;
29197 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29198 out_up = in_up << amount;
29201 out_down = in_down >> amount;
29202 out_down = (in_up << (32 - amount)) | out_down;
29204 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29205 out_up = in_up << amount;
29207 The ARM and Thumb2 variants are the same but implemented slightly
29208 differently. If this were only called during expand we could just
29209 use the Thumb2 case and let combine do the right thing, but this
29210 can also be called from post-reload splitters. */
29212 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29214 if (!TARGET_THUMB2
)
29216 /* Emit code for ARM mode. */
29217 emit_insn (SET (out_down
,
29218 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29219 if (code
== ASHIFTRT
)
29221 rtx_code_label
*done_label
= gen_label_rtx ();
29222 emit_jump_insn (BRANCH (LT
, done_label
));
29223 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29225 emit_label (done_label
);
29228 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29233 /* Emit code for Thumb2 mode.
29234 Thumb2 can't do shift and or in one insn. */
29235 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29236 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29238 if (code
== ASHIFTRT
)
29240 rtx_code_label
*done_label
= gen_label_rtx ();
29241 emit_jump_insn (BRANCH (LT
, done_label
));
29242 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29243 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29244 emit_label (done_label
);
29248 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29249 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29253 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29267 /* Returns true if the pattern is a valid symbolic address, which is either a
29268 symbol_ref or (symbol_ref + addend).
29270 According to the ARM ELF ABI, the initial addend of REL-type relocations
29271 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29272 literal field of the instruction as a 16-bit signed value in the range
29273 -32768 <= A < 32768. */
29276 arm_valid_symbolic_address_p (rtx addr
)
29278 rtx xop0
, xop1
= NULL_RTX
;
29281 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29284 /* (const (plus: symbol_ref const_int)) */
29285 if (GET_CODE (addr
) == CONST
)
29286 tmp
= XEXP (addr
, 0);
29288 if (GET_CODE (tmp
) == PLUS
)
29290 xop0
= XEXP (tmp
, 0);
29291 xop1
= XEXP (tmp
, 1);
29293 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29294 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29300 /* Returns true if a valid comparison operation and makes
29301 the operands in a form that is valid. */
29303 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29305 enum rtx_code code
= GET_CODE (*comparison
);
29307 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29308 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29310 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29312 if (code
== UNEQ
|| code
== LTGT
)
29315 code_int
= (int)code
;
29316 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29317 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29322 if (!arm_add_operand (*op1
, mode
))
29323 *op1
= force_reg (mode
, *op1
);
29324 if (!arm_add_operand (*op2
, mode
))
29325 *op2
= force_reg (mode
, *op2
);
29329 if (!cmpdi_operand (*op1
, mode
))
29330 *op1
= force_reg (mode
, *op1
);
29331 if (!cmpdi_operand (*op2
, mode
))
29332 *op2
= force_reg (mode
, *op2
);
29337 if (!arm_float_compare_operand (*op1
, mode
))
29338 *op1
= force_reg (mode
, *op1
);
29339 if (!arm_float_compare_operand (*op2
, mode
))
29340 *op2
= force_reg (mode
, *op2
);
29350 /* Maximum number of instructions to set block of memory. */
29352 arm_block_set_max_insns (void)
29354 if (optimize_function_for_size_p (cfun
))
29357 return current_tune
->max_insns_inline_memset
;
29360 /* Return TRUE if it's profitable to set block of memory for
29361 non-vectorized case. VAL is the value to set the memory
29362 with. LENGTH is the number of bytes to set. ALIGN is the
29363 alignment of the destination memory in bytes. UNALIGNED_P
29364 is TRUE if we can only set the memory with instructions
29365 meeting alignment requirements. USE_STRD_P is TRUE if we
29366 can use strd to set the memory. */
29368 arm_block_set_non_vect_profit_p (rtx val
,
29369 unsigned HOST_WIDE_INT length
,
29370 unsigned HOST_WIDE_INT align
,
29371 bool unaligned_p
, bool use_strd_p
)
29374 /* For leftovers in bytes of 0-7, we can set the memory block using
29375 strb/strh/str with minimum instruction number. */
29376 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29380 num
= arm_const_inline_cost (SET
, val
);
29381 num
+= length
/ align
+ length
% align
;
29383 else if (use_strd_p
)
29385 num
= arm_const_double_inline_cost (val
);
29386 num
+= (length
>> 3) + leftover
[length
& 7];
29390 num
= arm_const_inline_cost (SET
, val
);
29391 num
+= (length
>> 2) + leftover
[length
& 3];
29394 /* We may be able to combine last pair STRH/STRB into a single STR
29395 by shifting one byte back. */
29396 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29399 return (num
<= arm_block_set_max_insns ());
29402 /* Return TRUE if it's profitable to set block of memory for
29403 vectorized case. LENGTH is the number of bytes to set.
29404 ALIGN is the alignment of destination memory in bytes.
29405 MODE is the vector mode used to set the memory. */
29407 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29408 unsigned HOST_WIDE_INT align
,
29412 bool unaligned_p
= ((align
& 3) != 0);
29413 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29415 /* Instruction loading constant value. */
29417 /* Instructions storing the memory. */
29418 num
+= (length
+ nelt
- 1) / nelt
;
29419 /* Instructions adjusting the address expression. Only need to
29420 adjust address expression if it's 4 bytes aligned and bytes
29421 leftover can only be stored by mis-aligned store instruction. */
29422 if (!unaligned_p
&& (length
& 3) != 0)
29425 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29426 if (!unaligned_p
&& mode
== V16QImode
)
29429 return (num
<= arm_block_set_max_insns ());
29432 /* Set a block of memory using vectorization instructions for the
29433 unaligned case. We fill the first LENGTH bytes of the memory
29434 area starting from DSTBASE with byte constant VALUE. ALIGN is
29435 the alignment requirement of memory. Return TRUE if succeeded. */
29437 arm_block_set_unaligned_vect (rtx dstbase
,
29438 unsigned HOST_WIDE_INT length
,
29439 unsigned HOST_WIDE_INT value
,
29440 unsigned HOST_WIDE_INT align
)
29442 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29444 rtx val_elt
, val_vec
, reg
;
29445 rtx rval
[MAX_VECT_LEN
];
29446 rtx (*gen_func
) (rtx
, rtx
);
29448 unsigned HOST_WIDE_INT v
= value
;
29449 unsigned int offset
= 0;
29450 gcc_assert ((align
& 0x3) != 0);
29451 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29452 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29453 if (length
>= nelt_v16
)
29456 gen_func
= gen_movmisalignv16qi
;
29461 gen_func
= gen_movmisalignv8qi
;
29463 nelt_mode
= GET_MODE_NUNITS (mode
);
29464 gcc_assert (length
>= nelt_mode
);
29465 /* Skip if it isn't profitable. */
29466 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29469 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29470 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29472 v
= sext_hwi (v
, BITS_PER_WORD
);
29473 val_elt
= GEN_INT (v
);
29474 for (j
= 0; j
< nelt_mode
; j
++)
29477 reg
= gen_reg_rtx (mode
);
29478 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29479 /* Emit instruction loading the constant value. */
29480 emit_move_insn (reg
, val_vec
);
29482 /* Handle nelt_mode bytes in a vector. */
29483 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29485 emit_insn ((*gen_func
) (mem
, reg
));
29486 if (i
+ 2 * nelt_mode
<= length
)
29488 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29489 offset
+= nelt_mode
;
29490 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29494 /* If there are not less than nelt_v8 bytes leftover, we must be in
29496 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29498 /* Handle (8, 16) bytes leftover. */
29499 if (i
+ nelt_v8
< length
)
29501 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29502 offset
+= length
- i
;
29503 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29505 /* We are shifting bytes back, set the alignment accordingly. */
29506 if ((length
& 1) != 0 && align
>= 2)
29507 set_mem_align (mem
, BITS_PER_UNIT
);
29509 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29511 /* Handle (0, 8] bytes leftover. */
29512 else if (i
< length
&& i
+ nelt_v8
>= length
)
29514 if (mode
== V16QImode
)
29515 reg
= gen_lowpart (V8QImode
, reg
);
29517 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29518 + (nelt_mode
- nelt_v8
))));
29519 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29520 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29522 /* We are shifting bytes back, set the alignment accordingly. */
29523 if ((length
& 1) != 0 && align
>= 2)
29524 set_mem_align (mem
, BITS_PER_UNIT
);
29526 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29532 /* Set a block of memory using vectorization instructions for the
29533 aligned case. We fill the first LENGTH bytes of the memory area
29534 starting from DSTBASE with byte constant VALUE. ALIGN is the
29535 alignment requirement of memory. Return TRUE if succeeded. */
29537 arm_block_set_aligned_vect (rtx dstbase
,
29538 unsigned HOST_WIDE_INT length
,
29539 unsigned HOST_WIDE_INT value
,
29540 unsigned HOST_WIDE_INT align
)
29542 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29543 rtx dst
, addr
, mem
;
29544 rtx val_elt
, val_vec
, reg
;
29545 rtx rval
[MAX_VECT_LEN
];
29547 unsigned HOST_WIDE_INT v
= value
;
29548 unsigned int offset
= 0;
29550 gcc_assert ((align
& 0x3) == 0);
29551 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29552 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29553 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29558 nelt_mode
= GET_MODE_NUNITS (mode
);
29559 gcc_assert (length
>= nelt_mode
);
29560 /* Skip if it isn't profitable. */
29561 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29564 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29566 v
= sext_hwi (v
, BITS_PER_WORD
);
29567 val_elt
= GEN_INT (v
);
29568 for (j
= 0; j
< nelt_mode
; j
++)
29571 reg
= gen_reg_rtx (mode
);
29572 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29573 /* Emit instruction loading the constant value. */
29574 emit_move_insn (reg
, val_vec
);
29577 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29578 if (mode
== V16QImode
)
29580 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29581 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29583 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29584 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29586 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29587 offset
+= length
- nelt_mode
;
29588 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29589 /* We are shifting bytes back, set the alignment accordingly. */
29590 if ((length
& 0x3) == 0)
29591 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29592 else if ((length
& 0x1) == 0)
29593 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29595 set_mem_align (mem
, BITS_PER_UNIT
);
29597 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29600 /* Fall through for bytes leftover. */
29602 nelt_mode
= GET_MODE_NUNITS (mode
);
29603 reg
= gen_lowpart (V8QImode
, reg
);
29606 /* Handle 8 bytes in a vector. */
29607 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29609 addr
= plus_constant (Pmode
, dst
, i
);
29610 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29611 emit_move_insn (mem
, reg
);
29614 /* Handle single word leftover by shifting 4 bytes back. We can
29615 use aligned access for this case. */
29616 if (i
+ UNITS_PER_WORD
== length
)
29618 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29619 offset
+= i
- UNITS_PER_WORD
;
29620 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29621 /* We are shifting 4 bytes back, set the alignment accordingly. */
29622 if (align
> UNITS_PER_WORD
)
29623 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29625 emit_move_insn (mem
, reg
);
29627 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29628 We have to use unaligned access for this case. */
29629 else if (i
< length
)
29631 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29632 offset
+= length
- nelt_mode
;
29633 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29634 /* We are shifting bytes back, set the alignment accordingly. */
29635 if ((length
& 1) == 0)
29636 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29638 set_mem_align (mem
, BITS_PER_UNIT
);
29640 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29646 /* Set a block of memory using plain strh/strb instructions, only
29647 using instructions allowed by ALIGN on processor. We fill the
29648 first LENGTH bytes of the memory area starting from DSTBASE
29649 with byte constant VALUE. ALIGN is the alignment requirement
29652 arm_block_set_unaligned_non_vect (rtx dstbase
,
29653 unsigned HOST_WIDE_INT length
,
29654 unsigned HOST_WIDE_INT value
,
29655 unsigned HOST_WIDE_INT align
)
29658 rtx dst
, addr
, mem
;
29659 rtx val_exp
, val_reg
, reg
;
29661 HOST_WIDE_INT v
= value
;
29663 gcc_assert (align
== 1 || align
== 2);
29666 v
|= (value
<< BITS_PER_UNIT
);
29668 v
= sext_hwi (v
, BITS_PER_WORD
);
29669 val_exp
= GEN_INT (v
);
29670 /* Skip if it isn't profitable. */
29671 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29672 align
, true, false))
29675 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29676 mode
= (align
== 2 ? HImode
: QImode
);
29677 val_reg
= force_reg (SImode
, val_exp
);
29678 reg
= gen_lowpart (mode
, val_reg
);
29680 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29682 addr
= plus_constant (Pmode
, dst
, i
);
29683 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29684 emit_move_insn (mem
, reg
);
29687 /* Handle single byte leftover. */
29688 if (i
+ 1 == length
)
29690 reg
= gen_lowpart (QImode
, val_reg
);
29691 addr
= plus_constant (Pmode
, dst
, i
);
29692 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29693 emit_move_insn (mem
, reg
);
29697 gcc_assert (i
== length
);
29701 /* Set a block of memory using plain strd/str/strh/strb instructions,
29702 to permit unaligned copies on processors which support unaligned
29703 semantics for those instructions. We fill the first LENGTH bytes
29704 of the memory area starting from DSTBASE with byte constant VALUE.
29705 ALIGN is the alignment requirement of memory. */
29707 arm_block_set_aligned_non_vect (rtx dstbase
,
29708 unsigned HOST_WIDE_INT length
,
29709 unsigned HOST_WIDE_INT value
,
29710 unsigned HOST_WIDE_INT align
)
29713 rtx dst
, addr
, mem
;
29714 rtx val_exp
, val_reg
, reg
;
29715 unsigned HOST_WIDE_INT v
;
29718 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29719 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29721 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29722 if (length
< UNITS_PER_WORD
)
29723 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29726 v
|= (v
<< BITS_PER_WORD
);
29728 v
= sext_hwi (v
, BITS_PER_WORD
);
29730 val_exp
= GEN_INT (v
);
29731 /* Skip if it isn't profitable. */
29732 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29733 align
, false, use_strd_p
))
29738 /* Try without strd. */
29739 v
= (v
>> BITS_PER_WORD
);
29740 v
= sext_hwi (v
, BITS_PER_WORD
);
29741 val_exp
= GEN_INT (v
);
29742 use_strd_p
= false;
29743 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29744 align
, false, use_strd_p
))
29749 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29750 /* Handle double words using strd if possible. */
29753 val_reg
= force_reg (DImode
, val_exp
);
29755 for (; (i
+ 8 <= length
); i
+= 8)
29757 addr
= plus_constant (Pmode
, dst
, i
);
29758 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29759 emit_move_insn (mem
, reg
);
29763 val_reg
= force_reg (SImode
, val_exp
);
29765 /* Handle words. */
29766 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29767 for (; (i
+ 4 <= length
); i
+= 4)
29769 addr
= plus_constant (Pmode
, dst
, i
);
29770 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29771 if ((align
& 3) == 0)
29772 emit_move_insn (mem
, reg
);
29774 emit_insn (gen_unaligned_storesi (mem
, reg
));
29777 /* Merge last pair of STRH and STRB into a STR if possible. */
29778 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29780 addr
= plus_constant (Pmode
, dst
, i
- 1);
29781 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29782 /* We are shifting one byte back, set the alignment accordingly. */
29783 if ((align
& 1) == 0)
29784 set_mem_align (mem
, BITS_PER_UNIT
);
29786 /* Most likely this is an unaligned access, and we can't tell at
29787 compilation time. */
29788 emit_insn (gen_unaligned_storesi (mem
, reg
));
29792 /* Handle half word leftover. */
29793 if (i
+ 2 <= length
)
29795 reg
= gen_lowpart (HImode
, val_reg
);
29796 addr
= plus_constant (Pmode
, dst
, i
);
29797 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29798 if ((align
& 1) == 0)
29799 emit_move_insn (mem
, reg
);
29801 emit_insn (gen_unaligned_storehi (mem
, reg
));
29806 /* Handle single byte leftover. */
29807 if (i
+ 1 == length
)
29809 reg
= gen_lowpart (QImode
, val_reg
);
29810 addr
= plus_constant (Pmode
, dst
, i
);
29811 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29812 emit_move_insn (mem
, reg
);
29818 /* Set a block of memory using vectorization instructions for both
29819 aligned and unaligned cases. We fill the first LENGTH bytes of
29820 the memory area starting from DSTBASE with byte constant VALUE.
29821 ALIGN is the alignment requirement of memory. */
29823 arm_block_set_vect (rtx dstbase
,
29824 unsigned HOST_WIDE_INT length
,
29825 unsigned HOST_WIDE_INT value
,
29826 unsigned HOST_WIDE_INT align
)
29828 /* Check whether we need to use unaligned store instruction. */
29829 if (((align
& 3) != 0 || (length
& 3) != 0)
29830 /* Check whether unaligned store instruction is available. */
29831 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29834 if ((align
& 3) == 0)
29835 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29837 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29840 /* Expand string store operation. Firstly we try to do that by using
29841 vectorization instructions, then try with ARM unaligned access and
29842 double-word store if profitable. OPERANDS[0] is the destination,
29843 OPERANDS[1] is the number of bytes, operands[2] is the value to
29844 initialize the memory, OPERANDS[3] is the known alignment of the
29847 arm_gen_setmem (rtx
*operands
)
29849 rtx dstbase
= operands
[0];
29850 unsigned HOST_WIDE_INT length
;
29851 unsigned HOST_WIDE_INT value
;
29852 unsigned HOST_WIDE_INT align
;
29854 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29857 length
= UINTVAL (operands
[1]);
29861 value
= (UINTVAL (operands
[2]) & 0xFF);
29862 align
= UINTVAL (operands
[3]);
29863 if (TARGET_NEON
&& length
>= 8
29864 && current_tune
->string_ops_prefer_neon
29865 && arm_block_set_vect (dstbase
, length
, value
, align
))
29868 if (!unaligned_access
&& (align
& 3) != 0)
29869 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29871 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29876 arm_macro_fusion_p (void)
29878 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
29883 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
29886 rtx prev_set
= single_set (prev
);
29887 rtx curr_set
= single_set (curr
);
29893 if (any_condjump_p (curr
))
29896 if (!arm_macro_fusion_p ())
29899 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
29900 && aarch_crypto_can_dual_issue (prev
, curr
))
29903 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
)
29905 /* We are trying to fuse
29906 movw imm / movt imm
29907 instructions as a group that gets scheduled together. */
29909 set_dest
= SET_DEST (curr_set
);
29911 if (GET_MODE (set_dest
) != SImode
)
29914 /* We are trying to match:
29915 prev (movw) == (set (reg r0) (const_int imm16))
29916 curr (movt) == (set (zero_extract (reg r0)
29919 (const_int imm16_1))
29921 prev (movw) == (set (reg r1)
29922 (high (symbol_ref ("SYM"))))
29923 curr (movt) == (set (reg r0)
29925 (symbol_ref ("SYM")))) */
29926 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
29928 if (CONST_INT_P (SET_SRC (curr_set
))
29929 && CONST_INT_P (SET_SRC (prev_set
))
29930 && REG_P (XEXP (set_dest
, 0))
29931 && REG_P (SET_DEST (prev_set
))
29932 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
29935 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
29936 && REG_P (SET_DEST (curr_set
))
29937 && REG_P (SET_DEST (prev_set
))
29938 && GET_CODE (SET_SRC (prev_set
)) == HIGH
29939 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
29945 /* Return true iff the instruction fusion described by OP is enabled. */
29947 arm_fusion_enabled_p (tune_params::fuse_ops op
)
29949 return current_tune
->fusible_ops
& op
;
29952 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29954 static unsigned HOST_WIDE_INT
29955 arm_asan_shadow_offset (void)
29957 return HOST_WIDE_INT_1U
<< 29;
29961 /* This is a temporary fix for PR60655. Ideally we need
29962 to handle most of these cases in the generic part but
29963 currently we reject minus (..) (sym_ref). We try to
29964 ameliorate the case with minus (sym_ref1) (sym_ref2)
29965 where they are in the same section. */
29968 arm_const_not_ok_for_debug_p (rtx p
)
29970 tree decl_op0
= NULL
;
29971 tree decl_op1
= NULL
;
29973 if (GET_CODE (p
) == MINUS
)
29975 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29977 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29979 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29980 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29982 if ((TREE_CODE (decl_op1
) == VAR_DECL
29983 || TREE_CODE (decl_op1
) == CONST_DECL
)
29984 && (TREE_CODE (decl_op0
) == VAR_DECL
29985 || TREE_CODE (decl_op0
) == CONST_DECL
))
29986 return (get_variable_section (decl_op1
, false)
29987 != get_variable_section (decl_op0
, false));
29989 if (TREE_CODE (decl_op1
) == LABEL_DECL
29990 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29991 return (DECL_CONTEXT (decl_op1
)
29992 != DECL_CONTEXT (decl_op0
));
30002 /* return TRUE if x is a reference to a value in a constant pool */
30004 arm_is_constant_pool_ref (rtx x
)
30007 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30008 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30011 /* Remember the last target of arm_set_current_function. */
30012 static GTY(()) tree arm_previous_fndecl
;
30014 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30017 save_restore_target_globals (tree new_tree
)
30019 /* If we have a previous state, use it. */
30020 if (TREE_TARGET_GLOBALS (new_tree
))
30021 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30022 else if (new_tree
== target_option_default_node
)
30023 restore_target_globals (&default_target_globals
);
30026 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30027 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30030 arm_option_params_internal ();
30033 /* Invalidate arm_previous_fndecl. */
30036 arm_reset_previous_fndecl (void)
30038 arm_previous_fndecl
= NULL_TREE
;
30041 /* Establish appropriate back-end context for processing the function
30042 FNDECL. The argument might be NULL to indicate processing at top
30043 level, outside of any function scope. */
30046 arm_set_current_function (tree fndecl
)
30048 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30051 tree old_tree
= (arm_previous_fndecl
30052 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30055 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30057 /* If current function has no attributes but previous one did,
30058 use the default node. */
30059 if (! new_tree
&& old_tree
)
30060 new_tree
= target_option_default_node
;
30062 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30063 the default have been handled by save_restore_target_globals from
30064 arm_pragma_target_parse. */
30065 if (old_tree
== new_tree
)
30068 arm_previous_fndecl
= fndecl
;
30070 /* First set the target options. */
30071 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30073 save_restore_target_globals (new_tree
);
30076 /* Implement TARGET_OPTION_PRINT. */
30079 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30081 int flags
= ptr
->x_target_flags
;
30082 const struct arm_fpu_desc
*fpu_desc
= &all_fpus
[ptr
->x_arm_fpu_index
];
30084 fprintf (file
, "%*sselected arch %s\n", indent
, "",
30085 TARGET_THUMB2_P (flags
) ? "thumb2" :
30086 TARGET_THUMB_P (flags
) ? "thumb1" :
30089 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_desc
->name
);
30092 /* Hook to determine if one function can safely inline another. */
30095 arm_can_inline_p (tree caller
, tree callee
)
30097 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30098 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30100 struct cl_target_option
*caller_opts
30101 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30102 : target_option_default_node
);
30104 struct cl_target_option
*callee_opts
30105 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30106 : target_option_default_node
);
30108 const struct arm_fpu_desc
*caller_fpu
30109 = &all_fpus
[caller_opts
->x_arm_fpu_index
];
30110 const struct arm_fpu_desc
*callee_fpu
30111 = &all_fpus
[callee_opts
->x_arm_fpu_index
];
30113 /* Callee's fpu features should be a subset of the caller's. */
30114 if ((caller_fpu
->features
& callee_fpu
->features
) != callee_fpu
->features
)
30117 /* Need same model and regs. */
30118 if (callee_fpu
->model
!= caller_fpu
->model
30119 || callee_fpu
->regs
!= callee_fpu
->regs
)
30122 /* OK to inline between different modes.
30123 Function with mode specific instructions, e.g using asm,
30124 must be explicitly protected with noinline. */
30128 /* Hook to fix function's alignment affected by target attribute. */
30131 arm_relayout_function (tree fndecl
)
30133 if (DECL_USER_ALIGN (fndecl
))
30136 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30139 callee_tree
= target_option_default_node
;
30141 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30142 SET_DECL_ALIGN (fndecl
, FUNCTION_BOUNDARY_P (opts
->x_target_flags
));
30145 /* Inner function to process the attribute((target(...))), take an argument and
30146 set the current options from the argument. If we have a list, recursively
30147 go over the list. */
30150 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30152 if (TREE_CODE (args
) == TREE_LIST
)
30156 for (; args
; args
= TREE_CHAIN (args
))
30157 if (TREE_VALUE (args
)
30158 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30163 else if (TREE_CODE (args
) != STRING_CST
)
30165 error ("attribute %<target%> argument not a string");
30169 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30172 while ((q
= strtok (argstr
, ",")) != NULL
)
30174 while (ISSPACE (*q
)) ++q
;
30177 if (!strncmp (q
, "thumb", 5))
30178 opts
->x_target_flags
|= MASK_THUMB
;
30180 else if (!strncmp (q
, "arm", 3))
30181 opts
->x_target_flags
&= ~MASK_THUMB
;
30183 else if (!strncmp (q
, "fpu=", 4))
30185 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30186 &opts
->x_arm_fpu_index
, CL_TARGET
))
30188 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30194 error ("attribute(target(\"%s\")) is unknown", q
);
30198 arm_option_check_internal (opts
);
30204 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30207 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30208 struct gcc_options
*opts_set
)
30210 if (!arm_valid_target_attribute_rec (args
, opts
))
30213 /* Do any overrides, such as global options arch=xxx. */
30214 arm_option_override_internal (opts
, opts_set
);
30216 return build_target_option_node (opts
);
30220 add_attribute (const char * mode
, tree
*attributes
)
30222 size_t len
= strlen (mode
);
30223 tree value
= build_string (len
, mode
);
30225 TREE_TYPE (value
) = build_array_type (char_type_node
,
30226 build_index_type (size_int (len
)));
30228 *attributes
= tree_cons (get_identifier ("target"),
30229 build_tree_list (NULL_TREE
, value
),
30233 /* For testing. Insert thumb or arm modes alternatively on functions. */
30236 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30240 if (! TARGET_FLIP_THUMB
)
30243 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30244 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30247 /* Nested definitions must inherit mode. */
30248 if (current_function_decl
)
30250 mode
= TARGET_THUMB
? "thumb" : "arm";
30251 add_attribute (mode
, attributes
);
30255 /* If there is already a setting don't change it. */
30256 if (lookup_attribute ("target", *attributes
) != NULL
)
30259 mode
= thumb_flipper
? "thumb" : "arm";
30260 add_attribute (mode
, attributes
);
30262 thumb_flipper
= !thumb_flipper
;
30265 /* Hook to validate attribute((target("string"))). */
30268 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30269 tree args
, int ARG_UNUSED (flags
))
30272 struct gcc_options func_options
;
30273 tree cur_tree
, new_optimize
;
30274 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30276 /* Get the optimization options of the current function. */
30277 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30279 /* If the function changed the optimization levels as well as setting target
30280 options, start with the optimizations specified. */
30281 if (!func_optimize
)
30282 func_optimize
= optimization_default_node
;
30284 /* Init func_options. */
30285 memset (&func_options
, 0, sizeof (func_options
));
30286 init_options_struct (&func_options
, NULL
);
30287 lang_hooks
.init_options_struct (&func_options
);
30289 /* Initialize func_options to the defaults. */
30290 cl_optimization_restore (&func_options
,
30291 TREE_OPTIMIZATION (func_optimize
));
30293 cl_target_option_restore (&func_options
,
30294 TREE_TARGET_OPTION (target_option_default_node
));
30296 /* Set func_options flags with new target mode. */
30297 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30298 &global_options_set
);
30300 if (cur_tree
== NULL_TREE
)
30303 new_optimize
= build_optimization_node (&func_options
);
30305 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30307 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30309 finalize_options_struct (&func_options
);
30315 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30318 fprintf (stream
, "\t.syntax unified\n");
30322 if (is_called_in_ARM_mode (decl
)
30323 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30324 && cfun
->is_thunk
))
30325 fprintf (stream
, "\t.code 32\n");
30326 else if (TARGET_THUMB1
)
30327 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30329 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30332 fprintf (stream
, "\t.arm\n");
30334 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30335 TARGET_SOFT_FLOAT
? "softvfp" : TARGET_FPU_NAME
);
30337 if (TARGET_POKE_FUNCTION_NAME
)
30338 arm_poke_function_name (stream
, (const char *) name
);
30341 /* If MEM is in the form of [base+offset], extract the two parts
30342 of address and set to BASE and OFFSET, otherwise return false
30343 after clearing BASE and OFFSET. */
30346 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30350 gcc_assert (MEM_P (mem
));
30352 addr
= XEXP (mem
, 0);
30354 /* Strip off const from addresses like (const (addr)). */
30355 if (GET_CODE (addr
) == CONST
)
30356 addr
= XEXP (addr
, 0);
30358 if (GET_CODE (addr
) == REG
)
30361 *offset
= const0_rtx
;
30365 if (GET_CODE (addr
) == PLUS
30366 && GET_CODE (XEXP (addr
, 0)) == REG
30367 && CONST_INT_P (XEXP (addr
, 1)))
30369 *base
= XEXP (addr
, 0);
30370 *offset
= XEXP (addr
, 1);
30375 *offset
= NULL_RTX
;
30380 /* If INSN is a load or store of address in the form of [base+offset],
30381 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30382 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30383 otherwise return FALSE. */
30386 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30390 gcc_assert (INSN_P (insn
));
30391 x
= PATTERN (insn
);
30392 if (GET_CODE (x
) != SET
)
30396 dest
= SET_DEST (x
);
30397 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30400 extract_base_offset_in_addr (dest
, base
, offset
);
30402 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30405 extract_base_offset_in_addr (src
, base
, offset
);
30410 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30413 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30415 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30416 and PRI are only calculated for these instructions. For other instruction,
30417 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30418 instruction fusion can be supported by returning different priorities.
30420 It's important that irrelevant instructions get the largest FUSION_PRI. */
30423 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30424 int *fusion_pri
, int *pri
)
30430 gcc_assert (INSN_P (insn
));
30433 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30440 /* Load goes first. */
30442 *fusion_pri
= tmp
- 1;
30444 *fusion_pri
= tmp
- 2;
30448 /* INSN with smaller base register goes first. */
30449 tmp
-= ((REGNO (base
) & 0xff) << 20);
30451 /* INSN with smaller offset goes first. */
30452 off_val
= (int)(INTVAL (offset
));
30454 tmp
-= (off_val
& 0xfffff);
30456 tmp
+= ((- off_val
) & 0xfffff);
30463 /* Construct and return a PARALLEL RTX vector with elements numbering the
30464 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30465 the vector - from the perspective of the architecture. This does not
30466 line up with GCC's perspective on lane numbers, so we end up with
30467 different masks depending on our target endian-ness. The diagram
30468 below may help. We must draw the distinction when building masks
30469 which select one half of the vector. An instruction selecting
30470 architectural low-lanes for a big-endian target, must be described using
30471 a mask selecting GCC high-lanes.
30473 Big-Endian Little-Endian
30475 GCC 0 1 2 3 3 2 1 0
30476 | x | x | x | x | | x | x | x | x |
30477 Architecture 3 2 1 0 3 2 1 0
30479 Low Mask: { 2, 3 } { 0, 1 }
30480 High Mask: { 0, 1 } { 2, 3 }
30484 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30486 int nunits
= GET_MODE_NUNITS (mode
);
30487 rtvec v
= rtvec_alloc (nunits
/ 2);
30488 int high_base
= nunits
/ 2;
30494 if (BYTES_BIG_ENDIAN
)
30495 base
= high
? low_base
: high_base
;
30497 base
= high
? high_base
: low_base
;
30499 for (i
= 0; i
< nunits
/ 2; i
++)
30500 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30502 t1
= gen_rtx_PARALLEL (mode
, v
);
30506 /* Check OP for validity as a PARALLEL RTX vector with elements
30507 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30508 from the perspective of the architecture. See the diagram above
30509 arm_simd_vect_par_cnst_half_p for more details. */
30512 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30515 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30516 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30517 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30520 if (!VECTOR_MODE_P (mode
))
30523 if (count_op
!= count_ideal
)
30526 for (i
= 0; i
< count_ideal
; i
++)
30528 rtx elt_op
= XVECEXP (op
, 0, i
);
30529 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30531 if (!CONST_INT_P (elt_op
)
30532 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30538 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30541 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30544 /* For now, we punt and not handle this for TARGET_THUMB1. */
30545 if (vcall_offset
&& TARGET_THUMB1
)
30548 /* Otherwise ok. */
30552 #include "gt-arm.h"