1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
33 #include "stringpool.h"
39 #include "diagnostic-core.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
46 #include "insn-attr.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
60 #include "target-globals.h"
62 #include "tm-constrs.h"
65 /* This file should be included last. */
66 #include "target-def.h"
68 /* Forward definitions of types. */
69 typedef struct minipool_node Mnode
;
70 typedef struct minipool_fixup Mfix
;
72 void (*arm_lang_output_object_attributes_hook
)(void);
79 /* Forward function declarations. */
80 static bool arm_const_not_ok_for_debug_p (rtx
);
81 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
82 static int arm_compute_static_chain_stack_bytes (void);
83 static arm_stack_offsets
*arm_get_frame_offsets (void);
84 static void arm_add_gc_roots (void);
85 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
86 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
87 static unsigned bit_count (unsigned long);
88 static unsigned feature_count (const arm_feature_set
*);
89 static int arm_address_register_rtx_p (rtx
, int);
90 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
91 static bool is_called_in_ARM_mode (tree
);
92 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
93 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
94 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
95 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
96 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
97 inline static int thumb1_index_register_rtx_p (rtx
, int);
98 static int thumb_far_jump_used_p (void);
99 static bool thumb_force_lr_save (void);
100 static unsigned arm_size_return_regs (void);
101 static bool arm_assemble_integer (rtx
, unsigned int, int);
102 static void arm_print_operand (FILE *, rtx
, int);
103 static void arm_print_operand_address (FILE *, rtx
);
104 static bool arm_print_operand_punct_valid_p (unsigned char code
);
105 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
106 static arm_cc
get_arm_condition_code (rtx
);
107 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
108 static const char *output_multi_immediate (rtx
*, const char *, const char *,
110 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
111 static struct machine_function
*arm_init_machine_status (void);
112 static void thumb_exit (FILE *, int);
113 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
114 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
115 static Mnode
*add_minipool_forward_ref (Mfix
*);
116 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
117 static Mnode
*add_minipool_backward_ref (Mfix
*);
118 static void assign_minipool_offsets (Mfix
*);
119 static void arm_print_value (FILE *, rtx
);
120 static void dump_minipool (rtx_insn
*);
121 static int arm_barrier_cost (rtx_insn
*);
122 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
123 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
124 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
126 static void arm_reorg (void);
127 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
128 static unsigned long arm_compute_save_reg0_reg12_mask (void);
129 static unsigned long arm_compute_save_reg_mask (void);
130 static unsigned long arm_isr_value (tree
);
131 static unsigned long arm_compute_func_type (void);
132 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
133 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
134 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
135 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
136 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
138 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
139 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
140 static int arm_comp_type_attributes (const_tree
, const_tree
);
141 static void arm_set_default_type_attributes (tree
);
142 static int arm_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
143 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
144 static int optimal_immediate_sequence (enum rtx_code code
,
145 unsigned HOST_WIDE_INT val
,
146 struct four_ints
*return_sequence
);
147 static int optimal_immediate_sequence_1 (enum rtx_code code
,
148 unsigned HOST_WIDE_INT val
,
149 struct four_ints
*return_sequence
,
151 static int arm_get_strip_length (int);
152 static bool arm_function_ok_for_sibcall (tree
, tree
);
153 static machine_mode
arm_promote_function_mode (const_tree
,
156 static bool arm_return_in_memory (const_tree
, const_tree
);
157 static rtx
arm_function_value (const_tree
, const_tree
, bool);
158 static rtx
arm_libcall_value_1 (machine_mode
);
159 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
160 static bool arm_function_value_regno_p (const unsigned int);
161 static void arm_internal_label (FILE *, const char *, unsigned long);
162 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
164 static bool arm_have_conditional_execution (void);
165 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
166 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
167 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
168 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
169 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
170 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
171 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
172 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
173 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
174 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
175 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
176 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
177 static void emit_constant_insn (rtx cond
, rtx pattern
);
178 static rtx_insn
*emit_set_insn (rtx
, rtx
);
179 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
180 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
182 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
184 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
186 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
187 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
189 static rtx
aapcs_libcall_value (machine_mode
);
190 static int aapcs_select_return_coproc (const_tree
, const_tree
);
192 #ifdef OBJECT_FORMAT_ELF
193 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
194 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
197 static void arm_encode_section_info (tree
, rtx
, int);
200 static void arm_file_end (void);
201 static void arm_file_start (void);
202 static void arm_insert_attributes (tree
, tree
*);
204 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
206 static bool arm_pass_by_reference (cumulative_args_t
,
207 machine_mode
, const_tree
, bool);
208 static bool arm_promote_prototypes (const_tree
);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree
);
212 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
213 static bool arm_return_in_memory (const_tree
, const_tree
);
215 static void arm_unwind_emit (FILE *, rtx_insn
*);
216 static bool arm_output_ttype (rtx
);
217 static void arm_asm_emit_except_personality (rtx
);
218 static void arm_asm_init_sections (void);
220 static rtx
arm_dwarf_register_span (rtx
);
222 static tree
arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree
arm_get_cookie_size (tree
);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree
);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree
arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree
, rtx
);
234 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
235 static void arm_option_override (void);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option
*);
238 static void arm_set_current_function (tree
);
239 static bool arm_can_inline_p (tree
, tree
);
240 static void arm_relayout_function (tree
);
241 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
242 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
243 static bool arm_macro_fusion_p (void);
244 static bool arm_cannot_copy_insn_p (rtx_insn
*);
245 static int arm_issue_rate (void);
246 static int arm_first_cycle_multipass_dfa_lookahead (void);
247 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
248 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
249 static bool arm_output_addr_const_extra (FILE *, rtx
);
250 static bool arm_allocate_stack_slots_for_args (void);
251 static bool arm_warn_func_return (tree
);
252 static const char *arm_invalid_parameter_type (const_tree t
);
253 static const char *arm_invalid_return_type (const_tree t
);
254 static tree
arm_promoted_type (const_tree t
);
255 static tree
arm_convert_to_type (tree type
, tree expr
);
256 static bool arm_scalar_mode_supported_p (machine_mode
);
257 static bool arm_frame_pointer_required (void);
258 static bool arm_can_eliminate (const int, const int);
259 static void arm_asm_trampoline_template (FILE *);
260 static void arm_trampoline_init (rtx
, tree
, rtx
);
261 static rtx
arm_trampoline_adjust_address (rtx
);
262 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
263 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
264 static bool xscale_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
265 static bool fa726te_sched_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int *);
266 static bool arm_array_mode_supported_p (machine_mode
,
267 unsigned HOST_WIDE_INT
);
268 static machine_mode
arm_preferred_simd_mode (machine_mode
);
269 static bool arm_class_likely_spilled_p (reg_class_t
);
270 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
271 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
272 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
276 static void arm_conditional_register_usage (void);
277 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
278 static unsigned int arm_autovectorize_vector_sizes (void);
279 static int arm_default_branch_cost (bool, bool);
280 static int arm_cortex_a5_branch_cost (bool, bool);
281 static int arm_cortex_m_branch_cost (bool, bool);
282 static int arm_cortex_m7_branch_cost (bool, bool);
284 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
285 const unsigned char *sel
);
287 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
289 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
291 int misalign ATTRIBUTE_UNUSED
);
292 static unsigned arm_add_stmt_cost (void *data
, int count
,
293 enum vect_cost_for_stmt kind
,
294 struct _stmt_vec_info
*stmt_info
,
296 enum vect_cost_model_location where
);
298 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
299 bool op0_preserve_value
);
300 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
302 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
304 /* Table of machine attributes. */
305 static const struct attribute_spec arm_attribute_table
[] =
307 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
308 affects_type_identity } */
309 /* Function calls made to this symbol must be done indirectly, because
310 it may lie outside of the 26 bit addressing range of a normal function
312 { "long_call", 0, 0, false, true, true, NULL
, false },
313 /* Whereas these functions are always known to reside within the 26 bit
315 { "short_call", 0, 0, false, true, true, NULL
, false },
316 /* Specify the procedure call conventions for a function. */
317 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
319 /* Interrupt Service Routines have special prologue and epilogue requirements. */
320 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
322 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
324 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
327 /* ARM/PE has three new attributes:
329 dllexport - for exporting a function/variable that will live in a dll
330 dllimport - for importing a function/variable from a dll
332 Microsoft allows multiple declspecs in one __declspec, separating
333 them with spaces. We do NOT support this. Instead, use __declspec
336 { "dllimport", 0, 0, true, false, false, NULL
, false },
337 { "dllexport", 0, 0, true, false, false, NULL
, false },
338 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
340 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
343 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
346 { NULL
, 0, 0, false, false, false, NULL
, false }
349 /* Initialize the GCC target structure. */
350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
351 #undef TARGET_MERGE_DECL_ATTRIBUTES
352 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
355 #undef TARGET_LEGITIMIZE_ADDRESS
356 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
359 #define TARGET_LRA_P hook_bool_void_true
361 #undef TARGET_ATTRIBUTE_TABLE
362 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
364 #undef TARGET_INSERT_ATTRIBUTES
365 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
367 #undef TARGET_ASM_FILE_START
368 #define TARGET_ASM_FILE_START arm_file_start
369 #undef TARGET_ASM_FILE_END
370 #define TARGET_ASM_FILE_END arm_file_end
372 #undef TARGET_ASM_ALIGNED_SI_OP
373 #define TARGET_ASM_ALIGNED_SI_OP NULL
374 #undef TARGET_ASM_INTEGER
375 #define TARGET_ASM_INTEGER arm_assemble_integer
377 #undef TARGET_PRINT_OPERAND
378 #define TARGET_PRINT_OPERAND arm_print_operand
379 #undef TARGET_PRINT_OPERAND_ADDRESS
380 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
381 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
382 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
384 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
385 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
387 #undef TARGET_ASM_FUNCTION_PROLOGUE
388 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
390 #undef TARGET_ASM_FUNCTION_EPILOGUE
391 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
393 #undef TARGET_CAN_INLINE_P
394 #define TARGET_CAN_INLINE_P arm_can_inline_p
396 #undef TARGET_RELAYOUT_FUNCTION
397 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE arm_option_override
402 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
403 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
405 #undef TARGET_OPTION_PRINT
406 #define TARGET_OPTION_PRINT arm_option_print
408 #undef TARGET_COMP_TYPE_ATTRIBUTES
409 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
411 #undef TARGET_SCHED_MACRO_FUSION_P
412 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
414 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
415 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
417 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
418 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
420 #undef TARGET_SCHED_ADJUST_COST
421 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
423 #undef TARGET_SET_CURRENT_FUNCTION
424 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
426 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
427 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
429 #undef TARGET_SCHED_REORDER
430 #define TARGET_SCHED_REORDER arm_sched_reorder
432 #undef TARGET_REGISTER_MOVE_COST
433 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
435 #undef TARGET_MEMORY_MOVE_COST
436 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
438 #undef TARGET_ENCODE_SECTION_INFO
440 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
442 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
445 #undef TARGET_STRIP_NAME_ENCODING
446 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
448 #undef TARGET_ASM_INTERNAL_LABEL
449 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
451 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
452 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
454 #undef TARGET_FUNCTION_VALUE
455 #define TARGET_FUNCTION_VALUE arm_function_value
457 #undef TARGET_LIBCALL_VALUE
458 #define TARGET_LIBCALL_VALUE arm_libcall_value
460 #undef TARGET_FUNCTION_VALUE_REGNO_P
461 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
463 #undef TARGET_ASM_OUTPUT_MI_THUNK
464 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
465 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
466 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
468 #undef TARGET_RTX_COSTS
469 #define TARGET_RTX_COSTS arm_rtx_costs
470 #undef TARGET_ADDRESS_COST
471 #define TARGET_ADDRESS_COST arm_address_cost
473 #undef TARGET_SHIFT_TRUNCATION_MASK
474 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
475 #undef TARGET_VECTOR_MODE_SUPPORTED_P
476 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
477 #undef TARGET_ARRAY_MODE_SUPPORTED_P
478 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
479 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
480 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
481 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
482 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
483 arm_autovectorize_vector_sizes
485 #undef TARGET_MACHINE_DEPENDENT_REORG
486 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
488 #undef TARGET_INIT_BUILTINS
489 #define TARGET_INIT_BUILTINS arm_init_builtins
490 #undef TARGET_EXPAND_BUILTIN
491 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
492 #undef TARGET_BUILTIN_DECL
493 #define TARGET_BUILTIN_DECL arm_builtin_decl
495 #undef TARGET_INIT_LIBFUNCS
496 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
498 #undef TARGET_PROMOTE_FUNCTION_MODE
499 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
500 #undef TARGET_PROMOTE_PROTOTYPES
501 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
502 #undef TARGET_PASS_BY_REFERENCE
503 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
504 #undef TARGET_ARG_PARTIAL_BYTES
505 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
506 #undef TARGET_FUNCTION_ARG
507 #define TARGET_FUNCTION_ARG arm_function_arg
508 #undef TARGET_FUNCTION_ARG_ADVANCE
509 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
510 #undef TARGET_FUNCTION_ARG_BOUNDARY
511 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
513 #undef TARGET_SETUP_INCOMING_VARARGS
514 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
516 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
517 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
519 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
520 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
521 #undef TARGET_TRAMPOLINE_INIT
522 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
523 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
524 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
526 #undef TARGET_WARN_FUNC_RETURN
527 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
529 #undef TARGET_DEFAULT_SHORT_ENUMS
530 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
532 #undef TARGET_ALIGN_ANON_BITFIELD
533 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
535 #undef TARGET_NARROW_VOLATILE_BITFIELD
536 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
538 #undef TARGET_CXX_GUARD_TYPE
539 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
541 #undef TARGET_CXX_GUARD_MASK_BIT
542 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
544 #undef TARGET_CXX_GET_COOKIE_SIZE
545 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
547 #undef TARGET_CXX_COOKIE_HAS_SIZE
548 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
550 #undef TARGET_CXX_CDTOR_RETURNS_THIS
551 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
553 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
554 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
556 #undef TARGET_CXX_USE_AEABI_ATEXIT
557 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
559 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
560 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
561 arm_cxx_determine_class_data_visibility
563 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
564 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
566 #undef TARGET_RETURN_IN_MSB
567 #define TARGET_RETURN_IN_MSB arm_return_in_msb
569 #undef TARGET_RETURN_IN_MEMORY
570 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
572 #undef TARGET_MUST_PASS_IN_STACK
573 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
576 #undef TARGET_ASM_UNWIND_EMIT
577 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
579 /* EABI unwinding tables use a different format for the typeinfo tables. */
580 #undef TARGET_ASM_TTYPE
581 #define TARGET_ASM_TTYPE arm_output_ttype
583 #undef TARGET_ARM_EABI_UNWINDER
584 #define TARGET_ARM_EABI_UNWINDER true
586 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
587 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
589 #undef TARGET_ASM_INIT_SECTIONS
590 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
591 #endif /* ARM_UNWIND_INFO */
593 #undef TARGET_DWARF_REGISTER_SPAN
594 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
596 #undef TARGET_CANNOT_COPY_INSN_P
597 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
600 #undef TARGET_HAVE_TLS
601 #define TARGET_HAVE_TLS true
604 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
605 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
607 #undef TARGET_LEGITIMATE_CONSTANT_P
608 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
610 #undef TARGET_CANNOT_FORCE_CONST_MEM
611 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
613 #undef TARGET_MAX_ANCHOR_OFFSET
614 #define TARGET_MAX_ANCHOR_OFFSET 4095
616 /* The minimum is set such that the total size of the block
617 for a particular anchor is -4088 + 1 + 4095 bytes, which is
618 divisible by eight, ensuring natural spacing of anchors. */
619 #undef TARGET_MIN_ANCHOR_OFFSET
620 #define TARGET_MIN_ANCHOR_OFFSET -4088
622 #undef TARGET_SCHED_ISSUE_RATE
623 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
625 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
626 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
627 arm_first_cycle_multipass_dfa_lookahead
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
631 arm_first_cycle_multipass_dfa_lookahead_guard
633 #undef TARGET_MANGLE_TYPE
634 #define TARGET_MANGLE_TYPE arm_mangle_type
636 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
637 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
639 #undef TARGET_BUILD_BUILTIN_VA_LIST
640 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
641 #undef TARGET_EXPAND_BUILTIN_VA_START
642 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
643 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
644 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
647 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
648 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
651 #undef TARGET_LEGITIMATE_ADDRESS_P
652 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
654 #undef TARGET_PREFERRED_RELOAD_CLASS
655 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
657 #undef TARGET_INVALID_PARAMETER_TYPE
658 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
660 #undef TARGET_INVALID_RETURN_TYPE
661 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
663 #undef TARGET_PROMOTED_TYPE
664 #define TARGET_PROMOTED_TYPE arm_promoted_type
666 #undef TARGET_CONVERT_TO_TYPE
667 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
669 #undef TARGET_SCALAR_MODE_SUPPORTED_P
670 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
672 #undef TARGET_FRAME_POINTER_REQUIRED
673 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
675 #undef TARGET_CAN_ELIMINATE
676 #define TARGET_CAN_ELIMINATE arm_can_eliminate
678 #undef TARGET_CONDITIONAL_REGISTER_USAGE
679 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
681 #undef TARGET_CLASS_LIKELY_SPILLED_P
682 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
684 #undef TARGET_VECTORIZE_BUILTINS
685 #define TARGET_VECTORIZE_BUILTINS
687 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
688 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
689 arm_builtin_vectorized_function
691 #undef TARGET_VECTOR_ALIGNMENT
692 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
694 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
695 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
696 arm_vector_alignment_reachable
698 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
699 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
700 arm_builtin_support_vector_misalignment
702 #undef TARGET_PREFERRED_RENAME_CLASS
703 #define TARGET_PREFERRED_RENAME_CLASS \
704 arm_preferred_rename_class
706 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
707 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
708 arm_vectorize_vec_perm_const_ok
710 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
711 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
712 arm_builtin_vectorization_cost
713 #undef TARGET_VECTORIZE_ADD_STMT_COST
714 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
716 #undef TARGET_CANONICALIZE_COMPARISON
717 #define TARGET_CANONICALIZE_COMPARISON \
718 arm_canonicalize_comparison
720 #undef TARGET_ASAN_SHADOW_OFFSET
721 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
723 #undef MAX_INSN_PER_IT_BLOCK
724 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
726 #undef TARGET_CAN_USE_DOLOOP_P
727 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
729 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
730 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
732 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
733 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
735 #undef TARGET_SCHED_FUSION_PRIORITY
736 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
738 struct gcc_target targetm
= TARGET_INITIALIZER
;
740 /* Obstack for minipool constant handling. */
741 static struct obstack minipool_obstack
;
742 static char * minipool_startobj
;
744 /* The maximum number of insns skipped which
745 will be conditionalised if possible. */
746 static int max_insns_skipped
= 5;
748 extern FILE * asm_out_file
;
750 /* True if we are currently building a constant table. */
751 int making_const_table
;
753 /* The processor for which instructions should be scheduled. */
754 enum processor_type arm_tune
= arm_none
;
756 /* The current tuning set. */
757 const struct tune_params
*current_tune
;
759 /* Which floating point hardware to schedule for. */
762 /* Which floating popint hardware to use. */
763 const struct arm_fpu_desc
*arm_fpu_desc
;
765 /* Used for Thumb call_via trampolines. */
766 rtx thumb_call_via_label
[14];
767 static int thumb_call_reg_needed
;
769 /* The bits in this mask specify which
770 instructions we are allowed to generate. */
771 arm_feature_set insn_flags
= ARM_FSET_EMPTY
;
773 /* The bits in this mask specify which instruction scheduling options should
775 arm_feature_set tune_flags
= ARM_FSET_EMPTY
;
777 /* The highest ARM architecture version supported by the
779 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
781 /* The following are used in the arm.md file as equivalents to bits
782 in the above two flag variables. */
784 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
787 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
790 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
793 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
796 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
799 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
802 /* Nonzero if this chip supports the ARM 6K extensions. */
805 /* Nonzero if this chip supports the ARM 6KZ extensions. */
808 /* Nonzero if instructions present in ARMv6-M can be used. */
811 /* Nonzero if this chip supports the ARM 7 extensions. */
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm
= 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
820 /* Nonzero if instructions present in ARMv8 can be used. */
823 /* Nonzero if this chip can benefit from load scheduling. */
824 int arm_ld_sched
= 0;
826 /* Nonzero if this chip is a StrongARM. */
827 int arm_tune_strongarm
= 0;
829 /* Nonzero if this chip supports Intel Wireless MMX technology. */
830 int arm_arch_iwmmxt
= 0;
832 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
833 int arm_arch_iwmmxt2
= 0;
835 /* Nonzero if this chip is an XScale. */
836 int arm_arch_xscale
= 0;
838 /* Nonzero if tuning for XScale */
839 int arm_tune_xscale
= 0;
841 /* Nonzero if we want to tune for stores that access the write-buffer.
842 This typically means an ARM6 or ARM7 with MMU or MPU. */
843 int arm_tune_wbuf
= 0;
845 /* Nonzero if tuning for Cortex-A9. */
846 int arm_tune_cortex_a9
= 0;
848 /* Nonzero if we should define __THUMB_INTERWORK__ in the
850 XXX This is a bit of a hack, it's intended to help work around
851 problems in GLD which doesn't understand that armv5t code is
852 interworking clean. */
853 int arm_cpp_interwork
= 0;
855 /* Nonzero if chip supports Thumb 2. */
858 /* Nonzero if chip supports integer division instruction. */
859 int arm_arch_arm_hwdiv
;
860 int arm_arch_thumb_hwdiv
;
862 /* Nonzero if chip disallows volatile memory access in IT block. */
863 int arm_arch_no_volatile_ce
;
865 /* Nonzero if we should use Neon to handle 64-bits operations rather
866 than core registers. */
867 int prefer_neon_for_64bits
= 0;
869 /* Nonzero if we shouldn't use literal pools. */
870 bool arm_disable_literal_pool
= false;
872 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
873 we must report the mode of the memory reference from
874 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
875 machine_mode output_memory_reference_mode
;
877 /* The register number to be used for the PIC offset register. */
878 unsigned arm_pic_register
= INVALID_REGNUM
;
880 enum arm_pcs arm_pcs_default
;
882 /* For an explanation of these variables, see final_prescan_insn below. */
884 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
885 enum arm_cond_code arm_current_cc
;
888 int arm_target_label
;
889 /* The number of conditionally executed insns, including the current insn. */
890 int arm_condexec_count
= 0;
891 /* A bitmask specifying the patterns for the IT block.
892 Zero means do not output an IT block before this insn. */
893 int arm_condexec_mask
= 0;
894 /* The number of bits used in arm_condexec_mask. */
895 int arm_condexec_masklen
= 0;
897 /* Nonzero if chip supports the ARMv8 CRC instructions. */
898 int arm_arch_crc
= 0;
900 /* Nonzero if the core has a very small, high-latency, multiply unit. */
901 int arm_m_profile_small_mul
= 0;
903 /* The condition codes of the ARM, and the inverse function. */
904 static const char * const arm_condition_codes
[] =
906 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
907 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
910 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
911 int arm_regs_in_sequence
[] =
913 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
916 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
917 #define streq(string1, string2) (strcmp (string1, string2) == 0)
919 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
920 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
921 | (1 << PIC_OFFSET_TABLE_REGNUM)))
923 /* Initialization code. */
927 const char *const name
;
928 enum processor_type core
;
930 enum base_architecture base_arch
;
931 const arm_feature_set flags
;
932 const struct tune_params
*const tune
;
936 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
937 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
944 /* arm generic vectorizer costs. */
946 struct cpu_vec_costs arm_default_vec_cost
= {
947 1, /* scalar_stmt_cost. */
948 1, /* scalar load_cost. */
949 1, /* scalar_store_cost. */
950 1, /* vec_stmt_cost. */
951 1, /* vec_to_scalar_cost. */
952 1, /* scalar_to_vec_cost. */
953 1, /* vec_align_load_cost. */
954 1, /* vec_unalign_load_cost. */
955 1, /* vec_unalign_store_cost. */
956 1, /* vec_store_cost. */
957 3, /* cond_taken_branch_cost. */
958 1, /* cond_not_taken_branch_cost. */
961 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
962 #include "aarch-cost-tables.h"
966 const struct cpu_cost_table cortexa9_extra_costs
=
973 COSTS_N_INSNS (1), /* shift_reg. */
974 COSTS_N_INSNS (1), /* arith_shift. */
975 COSTS_N_INSNS (2), /* arith_shift_reg. */
977 COSTS_N_INSNS (1), /* log_shift_reg. */
978 COSTS_N_INSNS (1), /* extend. */
979 COSTS_N_INSNS (2), /* extend_arith. */
980 COSTS_N_INSNS (1), /* bfi. */
981 COSTS_N_INSNS (1), /* bfx. */
985 true /* non_exec_costs_exec. */
990 COSTS_N_INSNS (3), /* simple. */
991 COSTS_N_INSNS (3), /* flag_setting. */
992 COSTS_N_INSNS (2), /* extend. */
993 COSTS_N_INSNS (3), /* add. */
994 COSTS_N_INSNS (2), /* extend_add. */
995 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
999 0, /* simple (N/A). */
1000 0, /* flag_setting (N/A). */
1001 COSTS_N_INSNS (4), /* extend. */
1003 COSTS_N_INSNS (4), /* extend_add. */
1009 COSTS_N_INSNS (2), /* load. */
1010 COSTS_N_INSNS (2), /* load_sign_extend. */
1011 COSTS_N_INSNS (2), /* ldrd. */
1012 COSTS_N_INSNS (2), /* ldm_1st. */
1013 1, /* ldm_regs_per_insn_1st. */
1014 2, /* ldm_regs_per_insn_subsequent. */
1015 COSTS_N_INSNS (5), /* loadf. */
1016 COSTS_N_INSNS (5), /* loadd. */
1017 COSTS_N_INSNS (1), /* load_unaligned. */
1018 COSTS_N_INSNS (2), /* store. */
1019 COSTS_N_INSNS (2), /* strd. */
1020 COSTS_N_INSNS (2), /* stm_1st. */
1021 1, /* stm_regs_per_insn_1st. */
1022 2, /* stm_regs_per_insn_subsequent. */
1023 COSTS_N_INSNS (1), /* storef. */
1024 COSTS_N_INSNS (1), /* stored. */
1025 COSTS_N_INSNS (1), /* store_unaligned. */
1026 COSTS_N_INSNS (1), /* loadv. */
1027 COSTS_N_INSNS (1) /* storev. */
1032 COSTS_N_INSNS (14), /* div. */
1033 COSTS_N_INSNS (4), /* mult. */
1034 COSTS_N_INSNS (7), /* mult_addsub. */
1035 COSTS_N_INSNS (30), /* fma. */
1036 COSTS_N_INSNS (3), /* addsub. */
1037 COSTS_N_INSNS (1), /* fpconst. */
1038 COSTS_N_INSNS (1), /* neg. */
1039 COSTS_N_INSNS (3), /* compare. */
1040 COSTS_N_INSNS (3), /* widen. */
1041 COSTS_N_INSNS (3), /* narrow. */
1042 COSTS_N_INSNS (3), /* toint. */
1043 COSTS_N_INSNS (3), /* fromint. */
1044 COSTS_N_INSNS (3) /* roundint. */
1048 COSTS_N_INSNS (24), /* div. */
1049 COSTS_N_INSNS (5), /* mult. */
1050 COSTS_N_INSNS (8), /* mult_addsub. */
1051 COSTS_N_INSNS (30), /* fma. */
1052 COSTS_N_INSNS (3), /* addsub. */
1053 COSTS_N_INSNS (1), /* fpconst. */
1054 COSTS_N_INSNS (1), /* neg. */
1055 COSTS_N_INSNS (3), /* compare. */
1056 COSTS_N_INSNS (3), /* widen. */
1057 COSTS_N_INSNS (3), /* narrow. */
1058 COSTS_N_INSNS (3), /* toint. */
1059 COSTS_N_INSNS (3), /* fromint. */
1060 COSTS_N_INSNS (3) /* roundint. */
1065 COSTS_N_INSNS (1) /* alu. */
1069 const struct cpu_cost_table cortexa8_extra_costs
=
1075 COSTS_N_INSNS (1), /* shift. */
1077 COSTS_N_INSNS (1), /* arith_shift. */
1078 0, /* arith_shift_reg. */
1079 COSTS_N_INSNS (1), /* log_shift. */
1080 0, /* log_shift_reg. */
1082 0, /* extend_arith. */
1088 true /* non_exec_costs_exec. */
1093 COSTS_N_INSNS (1), /* simple. */
1094 COSTS_N_INSNS (1), /* flag_setting. */
1095 COSTS_N_INSNS (1), /* extend. */
1096 COSTS_N_INSNS (1), /* add. */
1097 COSTS_N_INSNS (1), /* extend_add. */
1098 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1102 0, /* simple (N/A). */
1103 0, /* flag_setting (N/A). */
1104 COSTS_N_INSNS (2), /* extend. */
1106 COSTS_N_INSNS (2), /* extend_add. */
1112 COSTS_N_INSNS (1), /* load. */
1113 COSTS_N_INSNS (1), /* load_sign_extend. */
1114 COSTS_N_INSNS (1), /* ldrd. */
1115 COSTS_N_INSNS (1), /* ldm_1st. */
1116 1, /* ldm_regs_per_insn_1st. */
1117 2, /* ldm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* loadf. */
1119 COSTS_N_INSNS (1), /* loadd. */
1120 COSTS_N_INSNS (1), /* load_unaligned. */
1121 COSTS_N_INSNS (1), /* store. */
1122 COSTS_N_INSNS (1), /* strd. */
1123 COSTS_N_INSNS (1), /* stm_1st. */
1124 1, /* stm_regs_per_insn_1st. */
1125 2, /* stm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* storef. */
1127 COSTS_N_INSNS (1), /* stored. */
1128 COSTS_N_INSNS (1), /* store_unaligned. */
1129 COSTS_N_INSNS (1), /* loadv. */
1130 COSTS_N_INSNS (1) /* storev. */
1135 COSTS_N_INSNS (36), /* div. */
1136 COSTS_N_INSNS (11), /* mult. */
1137 COSTS_N_INSNS (20), /* mult_addsub. */
1138 COSTS_N_INSNS (30), /* fma. */
1139 COSTS_N_INSNS (9), /* addsub. */
1140 COSTS_N_INSNS (3), /* fpconst. */
1141 COSTS_N_INSNS (3), /* neg. */
1142 COSTS_N_INSNS (6), /* compare. */
1143 COSTS_N_INSNS (4), /* widen. */
1144 COSTS_N_INSNS (4), /* narrow. */
1145 COSTS_N_INSNS (8), /* toint. */
1146 COSTS_N_INSNS (8), /* fromint. */
1147 COSTS_N_INSNS (8) /* roundint. */
1151 COSTS_N_INSNS (64), /* div. */
1152 COSTS_N_INSNS (16), /* mult. */
1153 COSTS_N_INSNS (25), /* mult_addsub. */
1154 COSTS_N_INSNS (30), /* fma. */
1155 COSTS_N_INSNS (9), /* addsub. */
1156 COSTS_N_INSNS (3), /* fpconst. */
1157 COSTS_N_INSNS (3), /* neg. */
1158 COSTS_N_INSNS (6), /* compare. */
1159 COSTS_N_INSNS (6), /* widen. */
1160 COSTS_N_INSNS (6), /* narrow. */
1161 COSTS_N_INSNS (8), /* toint. */
1162 COSTS_N_INSNS (8), /* fromint. */
1163 COSTS_N_INSNS (8) /* roundint. */
1168 COSTS_N_INSNS (1) /* alu. */
1172 const struct cpu_cost_table cortexa5_extra_costs
=
1178 COSTS_N_INSNS (1), /* shift. */
1179 COSTS_N_INSNS (1), /* shift_reg. */
1180 COSTS_N_INSNS (1), /* arith_shift. */
1181 COSTS_N_INSNS (1), /* arith_shift_reg. */
1182 COSTS_N_INSNS (1), /* log_shift. */
1183 COSTS_N_INSNS (1), /* log_shift_reg. */
1184 COSTS_N_INSNS (1), /* extend. */
1185 COSTS_N_INSNS (1), /* extend_arith. */
1186 COSTS_N_INSNS (1), /* bfi. */
1187 COSTS_N_INSNS (1), /* bfx. */
1188 COSTS_N_INSNS (1), /* clz. */
1189 COSTS_N_INSNS (1), /* rev. */
1191 true /* non_exec_costs_exec. */
1198 COSTS_N_INSNS (1), /* flag_setting. */
1199 COSTS_N_INSNS (1), /* extend. */
1200 COSTS_N_INSNS (1), /* add. */
1201 COSTS_N_INSNS (1), /* extend_add. */
1202 COSTS_N_INSNS (7) /* idiv. */
1206 0, /* simple (N/A). */
1207 0, /* flag_setting (N/A). */
1208 COSTS_N_INSNS (1), /* extend. */
1210 COSTS_N_INSNS (2), /* extend_add. */
1216 COSTS_N_INSNS (1), /* load. */
1217 COSTS_N_INSNS (1), /* load_sign_extend. */
1218 COSTS_N_INSNS (6), /* ldrd. */
1219 COSTS_N_INSNS (1), /* ldm_1st. */
1220 1, /* ldm_regs_per_insn_1st. */
1221 2, /* ldm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (2), /* loadf. */
1223 COSTS_N_INSNS (4), /* loadd. */
1224 COSTS_N_INSNS (1), /* load_unaligned. */
1225 COSTS_N_INSNS (1), /* store. */
1226 COSTS_N_INSNS (3), /* strd. */
1227 COSTS_N_INSNS (1), /* stm_1st. */
1228 1, /* stm_regs_per_insn_1st. */
1229 2, /* stm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* storef. */
1231 COSTS_N_INSNS (2), /* stored. */
1232 COSTS_N_INSNS (1), /* store_unaligned. */
1233 COSTS_N_INSNS (1), /* loadv. */
1234 COSTS_N_INSNS (1) /* storev. */
1239 COSTS_N_INSNS (15), /* div. */
1240 COSTS_N_INSNS (3), /* mult. */
1241 COSTS_N_INSNS (7), /* mult_addsub. */
1242 COSTS_N_INSNS (7), /* fma. */
1243 COSTS_N_INSNS (3), /* addsub. */
1244 COSTS_N_INSNS (3), /* fpconst. */
1245 COSTS_N_INSNS (3), /* neg. */
1246 COSTS_N_INSNS (3), /* compare. */
1247 COSTS_N_INSNS (3), /* widen. */
1248 COSTS_N_INSNS (3), /* narrow. */
1249 COSTS_N_INSNS (3), /* toint. */
1250 COSTS_N_INSNS (3), /* fromint. */
1251 COSTS_N_INSNS (3) /* roundint. */
1255 COSTS_N_INSNS (30), /* div. */
1256 COSTS_N_INSNS (6), /* mult. */
1257 COSTS_N_INSNS (10), /* mult_addsub. */
1258 COSTS_N_INSNS (7), /* fma. */
1259 COSTS_N_INSNS (3), /* addsub. */
1260 COSTS_N_INSNS (3), /* fpconst. */
1261 COSTS_N_INSNS (3), /* neg. */
1262 COSTS_N_INSNS (3), /* compare. */
1263 COSTS_N_INSNS (3), /* widen. */
1264 COSTS_N_INSNS (3), /* narrow. */
1265 COSTS_N_INSNS (3), /* toint. */
1266 COSTS_N_INSNS (3), /* fromint. */
1267 COSTS_N_INSNS (3) /* roundint. */
1272 COSTS_N_INSNS (1) /* alu. */
1277 const struct cpu_cost_table cortexa7_extra_costs
=
1283 COSTS_N_INSNS (1), /* shift. */
1284 COSTS_N_INSNS (1), /* shift_reg. */
1285 COSTS_N_INSNS (1), /* arith_shift. */
1286 COSTS_N_INSNS (1), /* arith_shift_reg. */
1287 COSTS_N_INSNS (1), /* log_shift. */
1288 COSTS_N_INSNS (1), /* log_shift_reg. */
1289 COSTS_N_INSNS (1), /* extend. */
1290 COSTS_N_INSNS (1), /* extend_arith. */
1291 COSTS_N_INSNS (1), /* bfi. */
1292 COSTS_N_INSNS (1), /* bfx. */
1293 COSTS_N_INSNS (1), /* clz. */
1294 COSTS_N_INSNS (1), /* rev. */
1296 true /* non_exec_costs_exec. */
1303 COSTS_N_INSNS (1), /* flag_setting. */
1304 COSTS_N_INSNS (1), /* extend. */
1305 COSTS_N_INSNS (1), /* add. */
1306 COSTS_N_INSNS (1), /* extend_add. */
1307 COSTS_N_INSNS (7) /* idiv. */
1311 0, /* simple (N/A). */
1312 0, /* flag_setting (N/A). */
1313 COSTS_N_INSNS (1), /* extend. */
1315 COSTS_N_INSNS (2), /* extend_add. */
1321 COSTS_N_INSNS (1), /* load. */
1322 COSTS_N_INSNS (1), /* load_sign_extend. */
1323 COSTS_N_INSNS (3), /* ldrd. */
1324 COSTS_N_INSNS (1), /* ldm_1st. */
1325 1, /* ldm_regs_per_insn_1st. */
1326 2, /* ldm_regs_per_insn_subsequent. */
1327 COSTS_N_INSNS (2), /* loadf. */
1328 COSTS_N_INSNS (2), /* loadd. */
1329 COSTS_N_INSNS (1), /* load_unaligned. */
1330 COSTS_N_INSNS (1), /* store. */
1331 COSTS_N_INSNS (3), /* strd. */
1332 COSTS_N_INSNS (1), /* stm_1st. */
1333 1, /* stm_regs_per_insn_1st. */
1334 2, /* stm_regs_per_insn_subsequent. */
1335 COSTS_N_INSNS (2), /* storef. */
1336 COSTS_N_INSNS (2), /* stored. */
1337 COSTS_N_INSNS (1), /* store_unaligned. */
1338 COSTS_N_INSNS (1), /* loadv. */
1339 COSTS_N_INSNS (1) /* storev. */
1344 COSTS_N_INSNS (15), /* div. */
1345 COSTS_N_INSNS (3), /* mult. */
1346 COSTS_N_INSNS (7), /* mult_addsub. */
1347 COSTS_N_INSNS (7), /* fma. */
1348 COSTS_N_INSNS (3), /* addsub. */
1349 COSTS_N_INSNS (3), /* fpconst. */
1350 COSTS_N_INSNS (3), /* neg. */
1351 COSTS_N_INSNS (3), /* compare. */
1352 COSTS_N_INSNS (3), /* widen. */
1353 COSTS_N_INSNS (3), /* narrow. */
1354 COSTS_N_INSNS (3), /* toint. */
1355 COSTS_N_INSNS (3), /* fromint. */
1356 COSTS_N_INSNS (3) /* roundint. */
1360 COSTS_N_INSNS (30), /* div. */
1361 COSTS_N_INSNS (6), /* mult. */
1362 COSTS_N_INSNS (10), /* mult_addsub. */
1363 COSTS_N_INSNS (7), /* fma. */
1364 COSTS_N_INSNS (3), /* addsub. */
1365 COSTS_N_INSNS (3), /* fpconst. */
1366 COSTS_N_INSNS (3), /* neg. */
1367 COSTS_N_INSNS (3), /* compare. */
1368 COSTS_N_INSNS (3), /* widen. */
1369 COSTS_N_INSNS (3), /* narrow. */
1370 COSTS_N_INSNS (3), /* toint. */
1371 COSTS_N_INSNS (3), /* fromint. */
1372 COSTS_N_INSNS (3) /* roundint. */
1377 COSTS_N_INSNS (1) /* alu. */
1381 const struct cpu_cost_table cortexa12_extra_costs
=
1388 COSTS_N_INSNS (1), /* shift_reg. */
1389 COSTS_N_INSNS (1), /* arith_shift. */
1390 COSTS_N_INSNS (1), /* arith_shift_reg. */
1391 COSTS_N_INSNS (1), /* log_shift. */
1392 COSTS_N_INSNS (1), /* log_shift_reg. */
1394 COSTS_N_INSNS (1), /* extend_arith. */
1396 COSTS_N_INSNS (1), /* bfx. */
1397 COSTS_N_INSNS (1), /* clz. */
1398 COSTS_N_INSNS (1), /* rev. */
1400 true /* non_exec_costs_exec. */
1405 COSTS_N_INSNS (2), /* simple. */
1406 COSTS_N_INSNS (3), /* flag_setting. */
1407 COSTS_N_INSNS (2), /* extend. */
1408 COSTS_N_INSNS (3), /* add. */
1409 COSTS_N_INSNS (2), /* extend_add. */
1410 COSTS_N_INSNS (18) /* idiv. */
1414 0, /* simple (N/A). */
1415 0, /* flag_setting (N/A). */
1416 COSTS_N_INSNS (3), /* extend. */
1418 COSTS_N_INSNS (3), /* extend_add. */
1424 COSTS_N_INSNS (3), /* load. */
1425 COSTS_N_INSNS (3), /* load_sign_extend. */
1426 COSTS_N_INSNS (3), /* ldrd. */
1427 COSTS_N_INSNS (3), /* ldm_1st. */
1428 1, /* ldm_regs_per_insn_1st. */
1429 2, /* ldm_regs_per_insn_subsequent. */
1430 COSTS_N_INSNS (3), /* loadf. */
1431 COSTS_N_INSNS (3), /* loadd. */
1432 0, /* load_unaligned. */
1436 1, /* stm_regs_per_insn_1st. */
1437 2, /* stm_regs_per_insn_subsequent. */
1438 COSTS_N_INSNS (2), /* storef. */
1439 COSTS_N_INSNS (2), /* stored. */
1440 0, /* store_unaligned. */
1441 COSTS_N_INSNS (1), /* loadv. */
1442 COSTS_N_INSNS (1) /* storev. */
1447 COSTS_N_INSNS (17), /* div. */
1448 COSTS_N_INSNS (4), /* mult. */
1449 COSTS_N_INSNS (8), /* mult_addsub. */
1450 COSTS_N_INSNS (8), /* fma. */
1451 COSTS_N_INSNS (4), /* addsub. */
1452 COSTS_N_INSNS (2), /* fpconst. */
1453 COSTS_N_INSNS (2), /* neg. */
1454 COSTS_N_INSNS (2), /* compare. */
1455 COSTS_N_INSNS (4), /* widen. */
1456 COSTS_N_INSNS (4), /* narrow. */
1457 COSTS_N_INSNS (4), /* toint. */
1458 COSTS_N_INSNS (4), /* fromint. */
1459 COSTS_N_INSNS (4) /* roundint. */
1463 COSTS_N_INSNS (31), /* div. */
1464 COSTS_N_INSNS (4), /* mult. */
1465 COSTS_N_INSNS (8), /* mult_addsub. */
1466 COSTS_N_INSNS (8), /* fma. */
1467 COSTS_N_INSNS (4), /* addsub. */
1468 COSTS_N_INSNS (2), /* fpconst. */
1469 COSTS_N_INSNS (2), /* neg. */
1470 COSTS_N_INSNS (2), /* compare. */
1471 COSTS_N_INSNS (4), /* widen. */
1472 COSTS_N_INSNS (4), /* narrow. */
1473 COSTS_N_INSNS (4), /* toint. */
1474 COSTS_N_INSNS (4), /* fromint. */
1475 COSTS_N_INSNS (4) /* roundint. */
1480 COSTS_N_INSNS (1) /* alu. */
1484 const struct cpu_cost_table cortexa15_extra_costs
=
1492 COSTS_N_INSNS (1), /* arith_shift. */
1493 COSTS_N_INSNS (1), /* arith_shift_reg. */
1494 COSTS_N_INSNS (1), /* log_shift. */
1495 COSTS_N_INSNS (1), /* log_shift_reg. */
1497 COSTS_N_INSNS (1), /* extend_arith. */
1498 COSTS_N_INSNS (1), /* bfi. */
1503 true /* non_exec_costs_exec. */
1508 COSTS_N_INSNS (2), /* simple. */
1509 COSTS_N_INSNS (3), /* flag_setting. */
1510 COSTS_N_INSNS (2), /* extend. */
1511 COSTS_N_INSNS (2), /* add. */
1512 COSTS_N_INSNS (2), /* extend_add. */
1513 COSTS_N_INSNS (18) /* idiv. */
1517 0, /* simple (N/A). */
1518 0, /* flag_setting (N/A). */
1519 COSTS_N_INSNS (3), /* extend. */
1521 COSTS_N_INSNS (3), /* extend_add. */
1527 COSTS_N_INSNS (3), /* load. */
1528 COSTS_N_INSNS (3), /* load_sign_extend. */
1529 COSTS_N_INSNS (3), /* ldrd. */
1530 COSTS_N_INSNS (4), /* ldm_1st. */
1531 1, /* ldm_regs_per_insn_1st. */
1532 2, /* ldm_regs_per_insn_subsequent. */
1533 COSTS_N_INSNS (4), /* loadf. */
1534 COSTS_N_INSNS (4), /* loadd. */
1535 0, /* load_unaligned. */
1538 COSTS_N_INSNS (1), /* stm_1st. */
1539 1, /* stm_regs_per_insn_1st. */
1540 2, /* stm_regs_per_insn_subsequent. */
1543 0, /* store_unaligned. */
1544 COSTS_N_INSNS (1), /* loadv. */
1545 COSTS_N_INSNS (1) /* storev. */
1550 COSTS_N_INSNS (17), /* div. */
1551 COSTS_N_INSNS (4), /* mult. */
1552 COSTS_N_INSNS (8), /* mult_addsub. */
1553 COSTS_N_INSNS (8), /* fma. */
1554 COSTS_N_INSNS (4), /* addsub. */
1555 COSTS_N_INSNS (2), /* fpconst. */
1556 COSTS_N_INSNS (2), /* neg. */
1557 COSTS_N_INSNS (5), /* compare. */
1558 COSTS_N_INSNS (4), /* widen. */
1559 COSTS_N_INSNS (4), /* narrow. */
1560 COSTS_N_INSNS (4), /* toint. */
1561 COSTS_N_INSNS (4), /* fromint. */
1562 COSTS_N_INSNS (4) /* roundint. */
1566 COSTS_N_INSNS (31), /* div. */
1567 COSTS_N_INSNS (4), /* mult. */
1568 COSTS_N_INSNS (8), /* mult_addsub. */
1569 COSTS_N_INSNS (8), /* fma. */
1570 COSTS_N_INSNS (4), /* addsub. */
1571 COSTS_N_INSNS (2), /* fpconst. */
1572 COSTS_N_INSNS (2), /* neg. */
1573 COSTS_N_INSNS (2), /* compare. */
1574 COSTS_N_INSNS (4), /* widen. */
1575 COSTS_N_INSNS (4), /* narrow. */
1576 COSTS_N_INSNS (4), /* toint. */
1577 COSTS_N_INSNS (4), /* fromint. */
1578 COSTS_N_INSNS (4) /* roundint. */
1583 COSTS_N_INSNS (1) /* alu. */
1587 const struct cpu_cost_table v7m_extra_costs
=
1595 0, /* arith_shift. */
1596 COSTS_N_INSNS (1), /* arith_shift_reg. */
1598 COSTS_N_INSNS (1), /* log_shift_reg. */
1600 COSTS_N_INSNS (1), /* extend_arith. */
1605 COSTS_N_INSNS (1), /* non_exec. */
1606 false /* non_exec_costs_exec. */
1611 COSTS_N_INSNS (1), /* simple. */
1612 COSTS_N_INSNS (1), /* flag_setting. */
1613 COSTS_N_INSNS (2), /* extend. */
1614 COSTS_N_INSNS (1), /* add. */
1615 COSTS_N_INSNS (3), /* extend_add. */
1616 COSTS_N_INSNS (8) /* idiv. */
1620 0, /* simple (N/A). */
1621 0, /* flag_setting (N/A). */
1622 COSTS_N_INSNS (2), /* extend. */
1624 COSTS_N_INSNS (3), /* extend_add. */
1630 COSTS_N_INSNS (2), /* load. */
1631 0, /* load_sign_extend. */
1632 COSTS_N_INSNS (3), /* ldrd. */
1633 COSTS_N_INSNS (2), /* ldm_1st. */
1634 1, /* ldm_regs_per_insn_1st. */
1635 1, /* ldm_regs_per_insn_subsequent. */
1636 COSTS_N_INSNS (2), /* loadf. */
1637 COSTS_N_INSNS (3), /* loadd. */
1638 COSTS_N_INSNS (1), /* load_unaligned. */
1639 COSTS_N_INSNS (2), /* store. */
1640 COSTS_N_INSNS (3), /* strd. */
1641 COSTS_N_INSNS (2), /* stm_1st. */
1642 1, /* stm_regs_per_insn_1st. */
1643 1, /* stm_regs_per_insn_subsequent. */
1644 COSTS_N_INSNS (2), /* storef. */
1645 COSTS_N_INSNS (3), /* stored. */
1646 COSTS_N_INSNS (1), /* store_unaligned. */
1647 COSTS_N_INSNS (1), /* loadv. */
1648 COSTS_N_INSNS (1) /* storev. */
1653 COSTS_N_INSNS (7), /* div. */
1654 COSTS_N_INSNS (2), /* mult. */
1655 COSTS_N_INSNS (5), /* mult_addsub. */
1656 COSTS_N_INSNS (3), /* fma. */
1657 COSTS_N_INSNS (1), /* addsub. */
1669 COSTS_N_INSNS (15), /* div. */
1670 COSTS_N_INSNS (5), /* mult. */
1671 COSTS_N_INSNS (7), /* mult_addsub. */
1672 COSTS_N_INSNS (7), /* fma. */
1673 COSTS_N_INSNS (3), /* addsub. */
1686 COSTS_N_INSNS (1) /* alu. */
1690 const struct tune_params arm_slowmul_tune
=
1692 arm_slowmul_rtx_costs
,
1693 NULL
, /* Insn extra costs. */
1694 NULL
, /* Sched adj cost. */
1695 arm_default_branch_cost
,
1696 &arm_default_vec_cost
,
1697 3, /* Constant limit. */
1698 5, /* Max cond insns. */
1699 8, /* Memset max inline. */
1700 1, /* Issue rate. */
1701 ARM_PREFETCH_NOT_BENEFICIAL
,
1702 tune_params::PREF_CONST_POOL_TRUE
,
1703 tune_params::PREF_LDRD_FALSE
,
1704 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1705 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1706 tune_params::DISPARAGE_FLAGS_NEITHER
,
1707 tune_params::PREF_NEON_64_FALSE
,
1708 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1709 tune_params::FUSE_NOTHING
,
1710 tune_params::SCHED_AUTOPREF_OFF
1713 const struct tune_params arm_fastmul_tune
=
1715 arm_fastmul_rtx_costs
,
1716 NULL
, /* Insn extra costs. */
1717 NULL
, /* Sched adj cost. */
1718 arm_default_branch_cost
,
1719 &arm_default_vec_cost
,
1720 1, /* Constant limit. */
1721 5, /* Max cond insns. */
1722 8, /* Memset max inline. */
1723 1, /* Issue rate. */
1724 ARM_PREFETCH_NOT_BENEFICIAL
,
1725 tune_params::PREF_CONST_POOL_TRUE
,
1726 tune_params::PREF_LDRD_FALSE
,
1727 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1728 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1729 tune_params::DISPARAGE_FLAGS_NEITHER
,
1730 tune_params::PREF_NEON_64_FALSE
,
1731 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1732 tune_params::FUSE_NOTHING
,
1733 tune_params::SCHED_AUTOPREF_OFF
1736 /* StrongARM has early execution of branches, so a sequence that is worth
1737 skipping is shorter. Set max_insns_skipped to a lower value. */
1739 const struct tune_params arm_strongarm_tune
=
1741 arm_fastmul_rtx_costs
,
1742 NULL
, /* Insn extra costs. */
1743 NULL
, /* Sched adj cost. */
1744 arm_default_branch_cost
,
1745 &arm_default_vec_cost
,
1746 1, /* Constant limit. */
1747 3, /* Max cond insns. */
1748 8, /* Memset max inline. */
1749 1, /* Issue rate. */
1750 ARM_PREFETCH_NOT_BENEFICIAL
,
1751 tune_params::PREF_CONST_POOL_TRUE
,
1752 tune_params::PREF_LDRD_FALSE
,
1753 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1754 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1755 tune_params::DISPARAGE_FLAGS_NEITHER
,
1756 tune_params::PREF_NEON_64_FALSE
,
1757 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1758 tune_params::FUSE_NOTHING
,
1759 tune_params::SCHED_AUTOPREF_OFF
1762 const struct tune_params arm_xscale_tune
=
1764 arm_xscale_rtx_costs
,
1765 NULL
, /* Insn extra costs. */
1766 xscale_sched_adjust_cost
,
1767 arm_default_branch_cost
,
1768 &arm_default_vec_cost
,
1769 2, /* Constant limit. */
1770 3, /* Max cond insns. */
1771 8, /* Memset max inline. */
1772 1, /* Issue rate. */
1773 ARM_PREFETCH_NOT_BENEFICIAL
,
1774 tune_params::PREF_CONST_POOL_TRUE
,
1775 tune_params::PREF_LDRD_FALSE
,
1776 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1777 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1778 tune_params::DISPARAGE_FLAGS_NEITHER
,
1779 tune_params::PREF_NEON_64_FALSE
,
1780 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1781 tune_params::FUSE_NOTHING
,
1782 tune_params::SCHED_AUTOPREF_OFF
1785 const struct tune_params arm_9e_tune
=
1788 NULL
, /* Insn extra costs. */
1789 NULL
, /* Sched adj cost. */
1790 arm_default_branch_cost
,
1791 &arm_default_vec_cost
,
1792 1, /* Constant limit. */
1793 5, /* Max cond insns. */
1794 8, /* Memset max inline. */
1795 1, /* Issue rate. */
1796 ARM_PREFETCH_NOT_BENEFICIAL
,
1797 tune_params::PREF_CONST_POOL_TRUE
,
1798 tune_params::PREF_LDRD_FALSE
,
1799 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1800 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1801 tune_params::DISPARAGE_FLAGS_NEITHER
,
1802 tune_params::PREF_NEON_64_FALSE
,
1803 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1804 tune_params::FUSE_NOTHING
,
1805 tune_params::SCHED_AUTOPREF_OFF
1808 const struct tune_params arm_marvell_pj4_tune
=
1811 NULL
, /* Insn extra costs. */
1812 NULL
, /* Sched adj cost. */
1813 arm_default_branch_cost
,
1814 &arm_default_vec_cost
,
1815 1, /* Constant limit. */
1816 5, /* Max cond insns. */
1817 8, /* Memset max inline. */
1818 2, /* Issue rate. */
1819 ARM_PREFETCH_NOT_BENEFICIAL
,
1820 tune_params::PREF_CONST_POOL_TRUE
,
1821 tune_params::PREF_LDRD_FALSE
,
1822 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1823 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1824 tune_params::DISPARAGE_FLAGS_NEITHER
,
1825 tune_params::PREF_NEON_64_FALSE
,
1826 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1827 tune_params::FUSE_NOTHING
,
1828 tune_params::SCHED_AUTOPREF_OFF
1831 const struct tune_params arm_v6t2_tune
=
1834 NULL
, /* Insn extra costs. */
1835 NULL
, /* Sched adj cost. */
1836 arm_default_branch_cost
,
1837 &arm_default_vec_cost
,
1838 1, /* Constant limit. */
1839 5, /* Max cond insns. */
1840 8, /* Memset max inline. */
1841 1, /* Issue rate. */
1842 ARM_PREFETCH_NOT_BENEFICIAL
,
1843 tune_params::PREF_CONST_POOL_FALSE
,
1844 tune_params::PREF_LDRD_FALSE
,
1845 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1847 tune_params::DISPARAGE_FLAGS_NEITHER
,
1848 tune_params::PREF_NEON_64_FALSE
,
1849 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1850 tune_params::FUSE_NOTHING
,
1851 tune_params::SCHED_AUTOPREF_OFF
1855 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1856 const struct tune_params arm_cortex_tune
=
1859 &generic_extra_costs
,
1860 NULL
, /* Sched adj cost. */
1861 arm_default_branch_cost
,
1862 &arm_default_vec_cost
,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 2, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL
,
1868 tune_params::PREF_CONST_POOL_FALSE
,
1869 tune_params::PREF_LDRD_FALSE
,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER
,
1873 tune_params::PREF_NEON_64_FALSE
,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1875 tune_params::FUSE_NOTHING
,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_cortex_a8_tune
=
1882 &cortexa8_extra_costs
,
1883 NULL
, /* Sched adj cost. */
1884 arm_default_branch_cost
,
1885 &arm_default_vec_cost
,
1886 1, /* Constant limit. */
1887 5, /* Max cond insns. */
1888 8, /* Memset max inline. */
1889 2, /* Issue rate. */
1890 ARM_PREFETCH_NOT_BENEFICIAL
,
1891 tune_params::PREF_CONST_POOL_FALSE
,
1892 tune_params::PREF_LDRD_FALSE
,
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1894 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1895 tune_params::DISPARAGE_FLAGS_NEITHER
,
1896 tune_params::PREF_NEON_64_FALSE
,
1897 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1898 tune_params::FUSE_NOTHING
,
1899 tune_params::SCHED_AUTOPREF_OFF
1902 const struct tune_params arm_cortex_a7_tune
=
1905 &cortexa7_extra_costs
,
1906 NULL
, /* Sched adj cost. */
1907 arm_default_branch_cost
,
1908 &arm_default_vec_cost
,
1909 1, /* Constant limit. */
1910 5, /* Max cond insns. */
1911 8, /* Memset max inline. */
1912 2, /* Issue rate. */
1913 ARM_PREFETCH_NOT_BENEFICIAL
,
1914 tune_params::PREF_CONST_POOL_FALSE
,
1915 tune_params::PREF_LDRD_FALSE
,
1916 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1917 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1918 tune_params::DISPARAGE_FLAGS_NEITHER
,
1919 tune_params::PREF_NEON_64_FALSE
,
1920 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1921 tune_params::FUSE_NOTHING
,
1922 tune_params::SCHED_AUTOPREF_OFF
1925 const struct tune_params arm_cortex_a15_tune
=
1928 &cortexa15_extra_costs
,
1929 NULL
, /* Sched adj cost. */
1930 arm_default_branch_cost
,
1931 &arm_default_vec_cost
,
1932 1, /* Constant limit. */
1933 2, /* Max cond insns. */
1934 8, /* Memset max inline. */
1935 3, /* Issue rate. */
1936 ARM_PREFETCH_NOT_BENEFICIAL
,
1937 tune_params::PREF_CONST_POOL_FALSE
,
1938 tune_params::PREF_LDRD_TRUE
,
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1940 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1941 tune_params::DISPARAGE_FLAGS_ALL
,
1942 tune_params::PREF_NEON_64_FALSE
,
1943 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1944 tune_params::FUSE_NOTHING
,
1945 tune_params::SCHED_AUTOPREF_FULL
1948 const struct tune_params arm_cortex_a53_tune
=
1951 &cortexa53_extra_costs
,
1952 NULL
, /* Sched adj cost. */
1953 arm_default_branch_cost
,
1954 &arm_default_vec_cost
,
1955 1, /* Constant limit. */
1956 5, /* Max cond insns. */
1957 8, /* Memset max inline. */
1958 2, /* Issue rate. */
1959 ARM_PREFETCH_NOT_BENEFICIAL
,
1960 tune_params::PREF_CONST_POOL_FALSE
,
1961 tune_params::PREF_LDRD_FALSE
,
1962 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1963 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1964 tune_params::DISPARAGE_FLAGS_NEITHER
,
1965 tune_params::PREF_NEON_64_FALSE
,
1966 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1967 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1968 tune_params::SCHED_AUTOPREF_OFF
1971 const struct tune_params arm_cortex_a57_tune
=
1974 &cortexa57_extra_costs
,
1975 NULL
, /* Sched adj cost. */
1976 arm_default_branch_cost
,
1977 &arm_default_vec_cost
,
1978 1, /* Constant limit. */
1979 2, /* Max cond insns. */
1980 8, /* Memset max inline. */
1981 3, /* Issue rate. */
1982 ARM_PREFETCH_NOT_BENEFICIAL
,
1983 tune_params::PREF_CONST_POOL_FALSE
,
1984 tune_params::PREF_LDRD_TRUE
,
1985 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1986 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1987 tune_params::DISPARAGE_FLAGS_ALL
,
1988 tune_params::PREF_NEON_64_FALSE
,
1989 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1990 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1991 tune_params::SCHED_AUTOPREF_FULL
1994 const struct tune_params arm_xgene1_tune
=
1997 &xgene1_extra_costs
,
1998 NULL
, /* Sched adj cost. */
1999 arm_default_branch_cost
,
2000 &arm_default_vec_cost
,
2001 1, /* Constant limit. */
2002 2, /* Max cond insns. */
2003 32, /* Memset max inline. */
2004 4, /* Issue rate. */
2005 ARM_PREFETCH_NOT_BENEFICIAL
,
2006 tune_params::PREF_CONST_POOL_FALSE
,
2007 tune_params::PREF_LDRD_TRUE
,
2008 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2009 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2010 tune_params::DISPARAGE_FLAGS_ALL
,
2011 tune_params::PREF_NEON_64_FALSE
,
2012 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2013 tune_params::FUSE_NOTHING
,
2014 tune_params::SCHED_AUTOPREF_OFF
2017 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2018 less appealing. Set max_insns_skipped to a low value. */
2020 const struct tune_params arm_cortex_a5_tune
=
2023 &cortexa5_extra_costs
,
2024 NULL
, /* Sched adj cost. */
2025 arm_cortex_a5_branch_cost
,
2026 &arm_default_vec_cost
,
2027 1, /* Constant limit. */
2028 1, /* Max cond insns. */
2029 8, /* Memset max inline. */
2030 2, /* Issue rate. */
2031 ARM_PREFETCH_NOT_BENEFICIAL
,
2032 tune_params::PREF_CONST_POOL_FALSE
,
2033 tune_params::PREF_LDRD_FALSE
,
2034 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2035 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2036 tune_params::DISPARAGE_FLAGS_NEITHER
,
2037 tune_params::PREF_NEON_64_FALSE
,
2038 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2039 tune_params::FUSE_NOTHING
,
2040 tune_params::SCHED_AUTOPREF_OFF
2043 const struct tune_params arm_cortex_a9_tune
=
2046 &cortexa9_extra_costs
,
2047 cortex_a9_sched_adjust_cost
,
2048 arm_default_branch_cost
,
2049 &arm_default_vec_cost
,
2050 1, /* Constant limit. */
2051 5, /* Max cond insns. */
2052 8, /* Memset max inline. */
2053 2, /* Issue rate. */
2054 ARM_PREFETCH_BENEFICIAL(4,32,32),
2055 tune_params::PREF_CONST_POOL_FALSE
,
2056 tune_params::PREF_LDRD_FALSE
,
2057 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2058 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2059 tune_params::DISPARAGE_FLAGS_NEITHER
,
2060 tune_params::PREF_NEON_64_FALSE
,
2061 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2062 tune_params::FUSE_NOTHING
,
2063 tune_params::SCHED_AUTOPREF_OFF
2066 const struct tune_params arm_cortex_a12_tune
=
2069 &cortexa12_extra_costs
,
2070 NULL
, /* Sched adj cost. */
2071 arm_default_branch_cost
,
2072 &arm_default_vec_cost
, /* Vectorizer costs. */
2073 1, /* Constant limit. */
2074 2, /* Max cond insns. */
2075 8, /* Memset max inline. */
2076 2, /* Issue rate. */
2077 ARM_PREFETCH_NOT_BENEFICIAL
,
2078 tune_params::PREF_CONST_POOL_FALSE
,
2079 tune_params::PREF_LDRD_TRUE
,
2080 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2081 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2082 tune_params::DISPARAGE_FLAGS_ALL
,
2083 tune_params::PREF_NEON_64_FALSE
,
2084 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2085 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2086 tune_params::SCHED_AUTOPREF_OFF
2089 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2090 cycle to execute each. An LDR from the constant pool also takes two cycles
2091 to execute, but mildly increases pipelining opportunity (consecutive
2092 loads/stores can be pipelined together, saving one cycle), and may also
2093 improve icache utilisation. Hence we prefer the constant pool for such
2096 const struct tune_params arm_v7m_tune
=
2100 NULL
, /* Sched adj cost. */
2101 arm_cortex_m_branch_cost
,
2102 &arm_default_vec_cost
,
2103 1, /* Constant limit. */
2104 2, /* Max cond insns. */
2105 8, /* Memset max inline. */
2106 1, /* Issue rate. */
2107 ARM_PREFETCH_NOT_BENEFICIAL
,
2108 tune_params::PREF_CONST_POOL_TRUE
,
2109 tune_params::PREF_LDRD_FALSE
,
2110 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2111 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2112 tune_params::DISPARAGE_FLAGS_NEITHER
,
2113 tune_params::PREF_NEON_64_FALSE
,
2114 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2115 tune_params::FUSE_NOTHING
,
2116 tune_params::SCHED_AUTOPREF_OFF
2119 /* Cortex-M7 tuning. */
2121 const struct tune_params arm_cortex_m7_tune
=
2125 NULL
, /* Sched adj cost. */
2126 arm_cortex_m7_branch_cost
,
2127 &arm_default_vec_cost
,
2128 0, /* Constant limit. */
2129 1, /* Max cond insns. */
2130 8, /* Memset max inline. */
2131 2, /* Issue rate. */
2132 ARM_PREFETCH_NOT_BENEFICIAL
,
2133 tune_params::PREF_CONST_POOL_TRUE
,
2134 tune_params::PREF_LDRD_FALSE
,
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2137 tune_params::DISPARAGE_FLAGS_NEITHER
,
2138 tune_params::PREF_NEON_64_FALSE
,
2139 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2140 tune_params::FUSE_NOTHING
,
2141 tune_params::SCHED_AUTOPREF_OFF
2144 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2145 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2146 const struct tune_params arm_v6m_tune
=
2149 NULL
, /* Insn extra costs. */
2150 NULL
, /* Sched adj cost. */
2151 arm_default_branch_cost
,
2152 &arm_default_vec_cost
, /* Vectorizer costs. */
2153 1, /* Constant limit. */
2154 5, /* Max cond insns. */
2155 8, /* Memset max inline. */
2156 1, /* Issue rate. */
2157 ARM_PREFETCH_NOT_BENEFICIAL
,
2158 tune_params::PREF_CONST_POOL_FALSE
,
2159 tune_params::PREF_LDRD_FALSE
,
2160 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2162 tune_params::DISPARAGE_FLAGS_NEITHER
,
2163 tune_params::PREF_NEON_64_FALSE
,
2164 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2165 tune_params::FUSE_NOTHING
,
2166 tune_params::SCHED_AUTOPREF_OFF
2169 const struct tune_params arm_fa726te_tune
=
2172 NULL
, /* Insn extra costs. */
2173 fa726te_sched_adjust_cost
,
2174 arm_default_branch_cost
,
2175 &arm_default_vec_cost
,
2176 1, /* Constant limit. */
2177 5, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL
,
2181 tune_params::PREF_CONST_POOL_TRUE
,
2182 tune_params::PREF_LDRD_FALSE
,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_NEITHER
,
2186 tune_params::PREF_NEON_64_FALSE
,
2187 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2188 tune_params::FUSE_NOTHING
,
2189 tune_params::SCHED_AUTOPREF_OFF
2193 /* Not all of these give usefully different compilation alternatives,
2194 but there is no simple way of generalizing them. */
2195 static const struct processors all_cores
[] =
2198 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2199 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2200 FLAGS, &arm_##COSTS##_tune},
2201 #include "arm-cores.def"
2203 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2206 static const struct processors all_architectures
[] =
2208 /* ARM Architectures */
2209 /* We don't specify tuning costs here as it will be figured out
2212 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2213 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2214 #include "arm-arches.def"
2216 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2220 /* These are populated as commandline arguments are processed, or NULL
2221 if not specified. */
2222 static const struct processors
*arm_selected_arch
;
2223 static const struct processors
*arm_selected_cpu
;
2224 static const struct processors
*arm_selected_tune
;
2226 /* The name of the preprocessor macro to define for this architecture. */
2228 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
2230 /* Available values for -mfpu=. */
2232 static const struct arm_fpu_desc all_fpus
[] =
2234 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \
2235 { NAME, MODEL, REV, VFP_REGS, FEATURES },
2236 #include "arm-fpus.def"
2241 /* Supported TLS relocations. */
2249 TLS_DESCSEQ
/* GNU scheme */
2252 /* The maximum number of insns to be used when loading a constant. */
2254 arm_constant_limit (bool size_p
)
2256 return size_p
? 1 : current_tune
->constant_limit
;
2259 /* Emit an insn that's a simple single-set. Both the operands must be known
2261 inline static rtx_insn
*
2262 emit_set_insn (rtx x
, rtx y
)
2264 return emit_insn (gen_rtx_SET (x
, y
));
2267 /* Return the number of bits set in VALUE. */
2269 bit_count (unsigned long value
)
2271 unsigned long count
= 0;
2276 value
&= value
- 1; /* Clear the least-significant set bit. */
2282 /* Return the number of features in feature-set SET. */
2284 feature_count (const arm_feature_set
* set
)
2286 return (bit_count (ARM_FSET_CPU1 (*set
))
2287 + bit_count (ARM_FSET_CPU2 (*set
)));
2294 } arm_fixed_mode_set
;
2296 /* A small helper for setting fixed-point library libfuncs. */
2299 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2300 const char *funcname
, const char *modename
,
2305 if (num_suffix
== 0)
2306 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2308 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2310 set_optab_libfunc (optable
, mode
, buffer
);
2314 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2315 machine_mode from
, const char *funcname
,
2316 const char *toname
, const char *fromname
)
2319 const char *maybe_suffix_2
= "";
2321 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2322 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2323 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2324 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2325 maybe_suffix_2
= "2";
2327 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2330 set_conv_libfunc (optable
, to
, from
, buffer
);
2333 /* Set up library functions unique to ARM. */
2336 arm_init_libfuncs (void)
2338 /* For Linux, we have access to kernel support for atomic operations. */
2339 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2340 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2342 /* There are no special library functions unless we are using the
2347 /* The functions below are described in Section 4 of the "Run-Time
2348 ABI for the ARM architecture", Version 1.0. */
2350 /* Double-precision floating-point arithmetic. Table 2. */
2351 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2352 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2353 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2354 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2355 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2357 /* Double-precision comparisons. Table 3. */
2358 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2359 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2360 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2361 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2362 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2363 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2364 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2366 /* Single-precision floating-point arithmetic. Table 4. */
2367 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2368 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2369 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2370 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2371 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2373 /* Single-precision comparisons. Table 5. */
2374 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2375 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2376 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2377 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2378 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2379 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2380 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2382 /* Floating-point to integer conversions. Table 6. */
2383 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2384 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2385 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2386 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2387 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2388 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2389 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2390 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2392 /* Conversions between floating types. Table 7. */
2393 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2394 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2396 /* Integer to floating-point conversions. Table 8. */
2397 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2398 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2399 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2400 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2401 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2402 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2403 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2404 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2406 /* Long long. Table 9. */
2407 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2408 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2409 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2410 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2411 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2412 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2413 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2414 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2416 /* Integer (32/32->32) division. \S 4.3.1. */
2417 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2418 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2420 /* The divmod functions are designed so that they can be used for
2421 plain division, even though they return both the quotient and the
2422 remainder. The quotient is returned in the usual location (i.e.,
2423 r0 for SImode, {r0, r1} for DImode), just as would be expected
2424 for an ordinary division routine. Because the AAPCS calling
2425 conventions specify that all of { r0, r1, r2, r3 } are
2426 callee-saved registers, there is no need to tell the compiler
2427 explicitly that those registers are clobbered by these
2429 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2430 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2432 /* For SImode division the ABI provides div-without-mod routines,
2433 which are faster. */
2434 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2435 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2437 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2438 divmod libcalls instead. */
2439 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2440 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2441 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2442 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2444 /* Half-precision float operations. The compiler handles all operations
2445 with NULL libfuncs by converting the SFmode. */
2446 switch (arm_fp16_format
)
2448 case ARM_FP16_FORMAT_IEEE
:
2449 case ARM_FP16_FORMAT_ALTERNATIVE
:
2452 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2453 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2455 : "__gnu_f2h_alternative"));
2456 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2457 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2459 : "__gnu_h2f_alternative"));
2462 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2463 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2464 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2465 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2466 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2469 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2470 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2471 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2472 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2473 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2474 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2475 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2482 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2484 const arm_fixed_mode_set fixed_arith_modes
[] =
2505 const arm_fixed_mode_set fixed_conv_modes
[] =
2535 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2537 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2538 "add", fixed_arith_modes
[i
].name
, 3);
2539 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2540 "ssadd", fixed_arith_modes
[i
].name
, 3);
2541 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2542 "usadd", fixed_arith_modes
[i
].name
, 3);
2543 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2544 "sub", fixed_arith_modes
[i
].name
, 3);
2545 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2546 "sssub", fixed_arith_modes
[i
].name
, 3);
2547 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2548 "ussub", fixed_arith_modes
[i
].name
, 3);
2549 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2550 "mul", fixed_arith_modes
[i
].name
, 3);
2551 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2552 "ssmul", fixed_arith_modes
[i
].name
, 3);
2553 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2554 "usmul", fixed_arith_modes
[i
].name
, 3);
2555 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2556 "div", fixed_arith_modes
[i
].name
, 3);
2557 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2558 "udiv", fixed_arith_modes
[i
].name
, 3);
2559 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2560 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2561 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2562 "usdiv", fixed_arith_modes
[i
].name
, 3);
2563 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2564 "neg", fixed_arith_modes
[i
].name
, 2);
2565 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2566 "ssneg", fixed_arith_modes
[i
].name
, 2);
2567 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2568 "usneg", fixed_arith_modes
[i
].name
, 2);
2569 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2570 "ashl", fixed_arith_modes
[i
].name
, 3);
2571 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2572 "ashr", fixed_arith_modes
[i
].name
, 3);
2573 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2574 "lshr", fixed_arith_modes
[i
].name
, 3);
2575 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2576 "ssashl", fixed_arith_modes
[i
].name
, 3);
2577 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2578 "usashl", fixed_arith_modes
[i
].name
, 3);
2579 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2580 "cmp", fixed_arith_modes
[i
].name
, 2);
2583 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2584 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2587 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2588 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2591 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2592 fixed_conv_modes
[j
].mode
, "fract",
2593 fixed_conv_modes
[i
].name
,
2594 fixed_conv_modes
[j
].name
);
2595 arm_set_fixed_conv_libfunc (satfract_optab
,
2596 fixed_conv_modes
[i
].mode
,
2597 fixed_conv_modes
[j
].mode
, "satfract",
2598 fixed_conv_modes
[i
].name
,
2599 fixed_conv_modes
[j
].name
);
2600 arm_set_fixed_conv_libfunc (fractuns_optab
,
2601 fixed_conv_modes
[i
].mode
,
2602 fixed_conv_modes
[j
].mode
, "fractuns",
2603 fixed_conv_modes
[i
].name
,
2604 fixed_conv_modes
[j
].name
);
2605 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2606 fixed_conv_modes
[i
].mode
,
2607 fixed_conv_modes
[j
].mode
, "satfractuns",
2608 fixed_conv_modes
[i
].name
,
2609 fixed_conv_modes
[j
].name
);
2613 if (TARGET_AAPCS_BASED
)
2614 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2617 /* On AAPCS systems, this is the "struct __va_list". */
2618 static GTY(()) tree va_list_type
;
2620 /* Return the type to use as __builtin_va_list. */
2622 arm_build_builtin_va_list (void)
2627 if (!TARGET_AAPCS_BASED
)
2628 return std_build_builtin_va_list ();
2630 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2638 The C Library ABI further reinforces this definition in \S
2641 We must follow this definition exactly. The structure tag
2642 name is visible in C++ mangled names, and thus forms a part
2643 of the ABI. The field name may be used by people who
2644 #include <stdarg.h>. */
2645 /* Create the type. */
2646 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2647 /* Give it the required name. */
2648 va_list_name
= build_decl (BUILTINS_LOCATION
,
2650 get_identifier ("__va_list"),
2652 DECL_ARTIFICIAL (va_list_name
) = 1;
2653 TYPE_NAME (va_list_type
) = va_list_name
;
2654 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2655 /* Create the __ap field. */
2656 ap_field
= build_decl (BUILTINS_LOCATION
,
2658 get_identifier ("__ap"),
2660 DECL_ARTIFICIAL (ap_field
) = 1;
2661 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2662 TYPE_FIELDS (va_list_type
) = ap_field
;
2663 /* Compute its layout. */
2664 layout_type (va_list_type
);
2666 return va_list_type
;
2669 /* Return an expression of type "void *" pointing to the next
2670 available argument in a variable-argument list. VALIST is the
2671 user-level va_list object, of type __builtin_va_list. */
2673 arm_extract_valist_ptr (tree valist
)
2675 if (TREE_TYPE (valist
) == error_mark_node
)
2676 return error_mark_node
;
2678 /* On an AAPCS target, the pointer is stored within "struct
2680 if (TARGET_AAPCS_BASED
)
2682 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2683 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2684 valist
, ap_field
, NULL_TREE
);
2690 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2692 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2694 valist
= arm_extract_valist_ptr (valist
);
2695 std_expand_builtin_va_start (valist
, nextarg
);
2698 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2700 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2703 valist
= arm_extract_valist_ptr (valist
);
2704 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2707 /* Check any incompatible options that the user has specified. */
2709 arm_option_check_internal (struct gcc_options
*opts
)
2711 int flags
= opts
->x_target_flags
;
2713 /* Make sure that the processor choice does not conflict with any of the
2714 other command line choices. */
2715 if (TARGET_ARM_P (flags
) && !ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
))
2716 error ("target CPU does not support ARM mode");
2718 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2719 from here where no function is being compiled currently. */
2720 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2721 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2723 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2724 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2726 /* If this target is normally configured to use APCS frames, warn if they
2727 are turned off and debugging is turned on. */
2728 if (TARGET_ARM_P (flags
)
2729 && write_symbols
!= NO_DEBUG
2730 && !TARGET_APCS_FRAME
2731 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2732 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2734 /* iWMMXt unsupported under Thumb mode. */
2735 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2736 error ("iWMMXt unsupported under Thumb mode");
2738 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2739 error ("can not use -mtp=cp15 with 16-bit Thumb");
2741 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2743 error ("RTP PIC is incompatible with Thumb");
2747 /* We only support -mslow-flash-data on armv7-m targets. */
2748 if (target_slow_flash_data
2749 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2750 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2751 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2754 /* Recompute the global settings depending on target attribute options. */
2757 arm_option_params_internal (void)
2759 /* If we are not using the default (ARM mode) section anchor offset
2760 ranges, then set the correct ranges now. */
2763 /* Thumb-1 LDR instructions cannot have negative offsets.
2764 Permissible positive offset ranges are 5-bit (for byte loads),
2765 6-bit (for halfword loads), or 7-bit (for word loads).
2766 Empirical results suggest a 7-bit anchor range gives the best
2767 overall code size. */
2768 targetm
.min_anchor_offset
= 0;
2769 targetm
.max_anchor_offset
= 127;
2771 else if (TARGET_THUMB2
)
2773 /* The minimum is set such that the total size of the block
2774 for a particular anchor is 248 + 1 + 4095 bytes, which is
2775 divisible by eight, ensuring natural spacing of anchors. */
2776 targetm
.min_anchor_offset
= -248;
2777 targetm
.max_anchor_offset
= 4095;
2781 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2782 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2787 /* If optimizing for size, bump the number of instructions that we
2788 are prepared to conditionally execute (even on a StrongARM). */
2789 max_insns_skipped
= 6;
2791 /* For THUMB2, we limit the conditional sequence to one IT block. */
2793 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2796 /* When -mrestrict-it is in use tone down the if-conversion. */
2797 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2798 ? 1 : current_tune
->max_insns_skipped
;
2801 /* True if -mflip-thumb should next add an attribute for the default
2802 mode, false if it should next add an attribute for the opposite mode. */
2803 static GTY(()) bool thumb_flipper
;
2805 /* Options after initial target override. */
2806 static GTY(()) tree init_optimize
;
2809 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2811 if (opts
->x_align_functions
<= 0)
2812 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2813 && opts
->x_optimize_size
? 2 : 4;
2816 /* Implement targetm.override_options_after_change. */
2819 arm_override_options_after_change (void)
2821 arm_override_options_after_change_1 (&global_options
);
2824 /* Reset options between modes that the user has specified. */
2826 arm_option_override_internal (struct gcc_options
*opts
,
2827 struct gcc_options
*opts_set
)
2829 arm_override_options_after_change_1 (opts
);
2831 if (TARGET_THUMB_P (opts
->x_target_flags
)
2832 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
)))
2834 warning (0, "target CPU does not support THUMB instructions");
2835 opts
->x_target_flags
&= ~MASK_THUMB
;
2838 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2840 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2841 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2844 /* Callee super interworking implies thumb interworking. Adding
2845 this to the flags here simplifies the logic elsewhere. */
2846 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2847 opts
->x_target_flags
|= MASK_INTERWORK
;
2849 /* need to remember initial values so combinaisons of options like
2850 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2851 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2853 if (! opts_set
->x_arm_restrict_it
)
2854 opts
->x_arm_restrict_it
= arm_arch8
;
2856 if (!TARGET_THUMB2_P (opts
->x_target_flags
))
2857 opts
->x_arm_restrict_it
= 0;
2859 /* Don't warn since it's on by default in -O2. */
2860 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2861 opts
->x_flag_schedule_insns
= 0;
2863 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
2865 /* Disable shrink-wrap when optimizing function for size, since it tends to
2866 generate additional returns. */
2867 if (optimize_function_for_size_p (cfun
)
2868 && TARGET_THUMB2_P (opts
->x_target_flags
))
2869 opts
->x_flag_shrink_wrap
= false;
2871 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
2873 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2874 - epilogue_insns - does not accurately model the corresponding insns
2875 emitted in the asm file. In particular, see the comment in thumb_exit
2876 'Find out how many of the (return) argument registers we can corrupt'.
2877 As a consequence, the epilogue may clobber registers without fipa-ra
2878 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2879 TODO: Accurately model clobbers for epilogue_insns and reenable
2881 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2882 opts
->x_flag_ipa_ra
= 0;
2884 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
2886 /* Thumb2 inline assembly code should always use unified syntax.
2887 This will apply to ARM and Thumb1 eventually. */
2888 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
2891 /* Fix up any incompatible options that the user has specified. */
2893 arm_option_override (void)
2895 arm_selected_arch
= NULL
;
2896 arm_selected_cpu
= NULL
;
2897 arm_selected_tune
= NULL
;
2899 if (global_options_set
.x_arm_arch_option
)
2900 arm_selected_arch
= &all_architectures
[arm_arch_option
];
2902 if (global_options_set
.x_arm_cpu_option
)
2904 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
2905 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
2908 if (global_options_set
.x_arm_tune_option
)
2909 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
2911 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2912 SUBTARGET_OVERRIDE_OPTIONS
;
2915 if (arm_selected_arch
)
2917 if (arm_selected_cpu
)
2919 const arm_feature_set tuning_flags
= ARM_FSET_MAKE_CPU1 (FL_TUNE
);
2920 arm_feature_set selected_flags
;
2921 ARM_FSET_XOR (selected_flags
, arm_selected_cpu
->flags
,
2922 arm_selected_arch
->flags
);
2923 ARM_FSET_EXCLUDE (selected_flags
, selected_flags
, tuning_flags
);
2924 /* Check for conflict between mcpu and march. */
2925 if (!ARM_FSET_IS_EMPTY (selected_flags
))
2927 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2928 arm_selected_cpu
->name
, arm_selected_arch
->name
);
2929 /* -march wins for code generation.
2930 -mcpu wins for default tuning. */
2931 if (!arm_selected_tune
)
2932 arm_selected_tune
= arm_selected_cpu
;
2934 arm_selected_cpu
= arm_selected_arch
;
2938 arm_selected_arch
= NULL
;
2941 /* Pick a CPU based on the architecture. */
2942 arm_selected_cpu
= arm_selected_arch
;
2945 /* If the user did not specify a processor, choose one for them. */
2946 if (!arm_selected_cpu
)
2948 const struct processors
* sel
;
2949 arm_feature_set sought
= ARM_FSET_EMPTY
;;
2951 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
2952 if (!arm_selected_cpu
->name
)
2954 #ifdef SUBTARGET_CPU_DEFAULT
2955 /* Use the subtarget default CPU if none was specified by
2957 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
2959 /* Default to ARM6. */
2960 if (!arm_selected_cpu
->name
)
2961 arm_selected_cpu
= &all_cores
[arm6
];
2964 sel
= arm_selected_cpu
;
2965 insn_flags
= sel
->flags
;
2967 /* Now check to see if the user has specified some command line
2968 switch that require certain abilities from the cpu. */
2970 if (TARGET_INTERWORK
|| TARGET_THUMB
)
2972 ARM_FSET_ADD_CPU1 (sought
, FL_THUMB
);
2973 ARM_FSET_ADD_CPU1 (sought
, FL_MODE32
);
2975 /* There are no ARM processors that support both APCS-26 and
2976 interworking. Therefore we force FL_MODE26 to be removed
2977 from insn_flags here (if it was set), so that the search
2978 below will always be able to find a compatible processor. */
2979 ARM_FSET_DEL_CPU1 (insn_flags
, FL_MODE26
);
2982 if (!ARM_FSET_IS_EMPTY (sought
)
2983 && !(ARM_FSET_CPU_SUBSET (sought
, insn_flags
)))
2985 /* Try to locate a CPU type that supports all of the abilities
2986 of the default CPU, plus the extra abilities requested by
2988 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
2989 if (ARM_FSET_CPU_SUBSET (sought
, sel
->flags
))
2992 if (sel
->name
== NULL
)
2994 unsigned current_bit_count
= 0;
2995 const struct processors
* best_fit
= NULL
;
2997 /* Ideally we would like to issue an error message here
2998 saying that it was not possible to find a CPU compatible
2999 with the default CPU, but which also supports the command
3000 line options specified by the programmer, and so they
3001 ought to use the -mcpu=<name> command line option to
3002 override the default CPU type.
3004 If we cannot find a cpu that has both the
3005 characteristics of the default cpu and the given
3006 command line options we scan the array again looking
3007 for a best match. */
3008 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3010 arm_feature_set required
= ARM_FSET_EMPTY
;
3011 ARM_FSET_UNION (required
, sought
, insn_flags
);
3012 if (ARM_FSET_CPU_SUBSET (required
, sel
->flags
))
3015 arm_feature_set flags
;
3016 ARM_FSET_INTER (flags
, sel
->flags
, insn_flags
);
3017 count
= feature_count (&flags
);
3019 if (count
>= current_bit_count
)
3022 current_bit_count
= count
;
3026 gcc_assert (best_fit
);
3030 arm_selected_cpu
= sel
;
3034 gcc_assert (arm_selected_cpu
);
3035 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3036 if (!arm_selected_tune
)
3037 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3039 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
3040 insn_flags
= arm_selected_cpu
->flags
;
3041 arm_base_arch
= arm_selected_cpu
->base_arch
;
3043 arm_tune
= arm_selected_tune
->core
;
3044 tune_flags
= arm_selected_tune
->flags
;
3045 current_tune
= arm_selected_tune
->tune
;
3047 /* TBD: Dwarf info for apcs frame is not handled yet. */
3048 if (TARGET_APCS_FRAME
)
3049 flag_shrink_wrap
= false;
3051 /* BPABI targets use linker tricks to allow interworking on cores
3052 without thumb support. */
3053 if (TARGET_INTERWORK
3054 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
) || TARGET_BPABI
))
3056 warning (0, "target CPU does not support interworking" );
3057 target_flags
&= ~MASK_INTERWORK
;
3060 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3062 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3063 target_flags
|= MASK_APCS_FRAME
;
3066 if (TARGET_POKE_FUNCTION_NAME
)
3067 target_flags
|= MASK_APCS_FRAME
;
3069 if (TARGET_APCS_REENT
&& flag_pic
)
3070 error ("-fpic and -mapcs-reent are incompatible");
3072 if (TARGET_APCS_REENT
)
3073 warning (0, "APCS reentrant code not supported. Ignored");
3075 if (TARGET_APCS_FLOAT
)
3076 warning (0, "passing floating point arguments in fp regs not yet supported");
3078 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3079 arm_arch3m
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH3M
);
3080 arm_arch4
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH4
);
3081 arm_arch4t
= arm_arch4
&& (ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
));
3082 arm_arch5
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5
);
3083 arm_arch5e
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5E
);
3084 arm_arch6
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6
);
3085 arm_arch6k
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6K
);
3086 arm_arch6kz
= arm_arch6k
&& ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6KZ
);
3087 arm_arch_notm
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
);
3088 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3089 arm_arch7
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7
);
3090 arm_arch7em
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7EM
);
3091 arm_arch8
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH8
);
3092 arm_arch_thumb2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB2
);
3093 arm_arch_xscale
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_XSCALE
);
3095 arm_ld_sched
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_LDSCHED
);
3096 arm_tune_strongarm
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_STRONG
);
3097 arm_tune_wbuf
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_WBUF
);
3098 arm_tune_xscale
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_XSCALE
);
3099 arm_arch_iwmmxt
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT
);
3100 arm_arch_iwmmxt2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT2
);
3101 arm_arch_thumb_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB_DIV
);
3102 arm_arch_arm_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARM_DIV
);
3103 arm_arch_no_volatile_ce
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NO_VOLATILE_CE
);
3104 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
3105 arm_arch_crc
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_CRC32
);
3106 arm_m_profile_small_mul
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_SMALLMUL
);
3108 /* V5 code we generate is completely interworking capable, so we turn off
3109 TARGET_INTERWORK here to avoid many tests later on. */
3111 /* XXX However, we must pass the right pre-processor defines to CPP
3112 or GLD can get confused. This is a hack. */
3113 if (TARGET_INTERWORK
)
3114 arm_cpp_interwork
= 1;
3117 target_flags
&= ~MASK_INTERWORK
;
3119 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3120 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3122 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3123 error ("iwmmxt abi requires an iwmmxt capable cpu");
3125 if (!global_options_set
.x_arm_fpu_index
)
3127 const char *target_fpu_name
;
3130 #ifdef FPUTYPE_DEFAULT
3131 target_fpu_name
= FPUTYPE_DEFAULT
;
3133 target_fpu_name
= "vfp";
3136 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
3141 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
3143 switch (arm_fpu_desc
->model
)
3145 case ARM_FP_MODEL_VFP
:
3146 arm_fpu_attr
= FPU_VFP
;
3153 if (TARGET_AAPCS_BASED
)
3155 if (TARGET_CALLER_INTERWORKING
)
3156 error ("AAPCS does not support -mcaller-super-interworking");
3158 if (TARGET_CALLEE_INTERWORKING
)
3159 error ("AAPCS does not support -mcallee-super-interworking");
3162 /* iWMMXt and NEON are incompatible. */
3163 if (TARGET_IWMMXT
&& TARGET_NEON
)
3164 error ("iWMMXt and NEON are incompatible");
3166 /* __fp16 support currently assumes the core has ldrh. */
3167 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3168 sorry ("__fp16 and no ldrh");
3170 /* If soft-float is specified then don't use FPU. */
3171 if (TARGET_SOFT_FLOAT
)
3172 arm_fpu_attr
= FPU_NONE
;
3174 if (TARGET_AAPCS_BASED
)
3176 if (arm_abi
== ARM_ABI_IWMMXT
)
3177 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3178 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3179 && TARGET_HARD_FLOAT
3181 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3183 arm_pcs_default
= ARM_PCS_AAPCS
;
3187 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
3188 sorry ("-mfloat-abi=hard and VFP");
3190 if (arm_abi
== ARM_ABI_APCS
)
3191 arm_pcs_default
= ARM_PCS_APCS
;
3193 arm_pcs_default
= ARM_PCS_ATPCS
;
3196 /* For arm2/3 there is no need to do any scheduling if we are doing
3197 software floating-point. */
3198 if (TARGET_SOFT_FLOAT
&& !ARM_FSET_HAS_CPU1 (tune_flags
, FL_MODE32
))
3199 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3201 /* Use the cp15 method if it is available. */
3202 if (target_thread_pointer
== TP_AUTO
)
3204 if (arm_arch6k
&& !TARGET_THUMB1
)
3205 target_thread_pointer
= TP_CP15
;
3207 target_thread_pointer
= TP_SOFT
;
3210 /* Override the default structure alignment for AAPCS ABI. */
3211 if (!global_options_set
.x_arm_structure_size_boundary
)
3213 if (TARGET_AAPCS_BASED
)
3214 arm_structure_size_boundary
= 8;
3218 if (arm_structure_size_boundary
!= 8
3219 && arm_structure_size_boundary
!= 32
3220 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3222 if (ARM_DOUBLEWORD_ALIGN
)
3224 "structure size boundary can only be set to 8, 32 or 64");
3226 warning (0, "structure size boundary can only be set to 8 or 32");
3227 arm_structure_size_boundary
3228 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3232 /* If stack checking is disabled, we can use r10 as the PIC register,
3233 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3234 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3236 if (TARGET_VXWORKS_RTP
)
3237 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3238 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3241 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3242 arm_pic_register
= 9;
3244 if (arm_pic_register_string
!= NULL
)
3246 int pic_register
= decode_reg_name (arm_pic_register_string
);
3249 warning (0, "-mpic-register= is useless without -fpic");
3251 /* Prevent the user from choosing an obviously stupid PIC register. */
3252 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3253 || pic_register
== HARD_FRAME_POINTER_REGNUM
3254 || pic_register
== STACK_POINTER_REGNUM
3255 || pic_register
>= PC_REGNUM
3256 || (TARGET_VXWORKS_RTP
3257 && (unsigned int) pic_register
!= arm_pic_register
))
3258 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3260 arm_pic_register
= pic_register
;
3263 if (TARGET_VXWORKS_RTP
3264 && !global_options_set
.x_arm_pic_data_is_text_relative
)
3265 arm_pic_data_is_text_relative
= 0;
3267 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3268 if (fix_cm3_ldrd
== 2)
3270 if (arm_selected_cpu
->core
== cortexm3
)
3276 /* Enable -munaligned-access by default for
3277 - all ARMv6 architecture-based processors
3278 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3279 - ARMv8 architecture-base processors.
3281 Disable -munaligned-access by default for
3282 - all pre-ARMv6 architecture-based processors
3283 - ARMv6-M architecture-based processors. */
3285 if (unaligned_access
== 2)
3287 if (arm_arch6
&& (arm_arch_notm
|| arm_arch7
))
3288 unaligned_access
= 1;
3290 unaligned_access
= 0;
3292 else if (unaligned_access
== 1
3293 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3295 warning (0, "target CPU does not support unaligned accesses");
3296 unaligned_access
= 0;
3299 /* Hot/Cold partitioning is not currently supported, since we can't
3300 handle literal pool placement in that case. */
3301 if (flag_reorder_blocks_and_partition
)
3303 inform (input_location
,
3304 "-freorder-blocks-and-partition not supported on this architecture");
3305 flag_reorder_blocks_and_partition
= 0;
3306 flag_reorder_blocks
= 1;
3310 /* Hoisting PIC address calculations more aggressively provides a small,
3311 but measurable, size reduction for PIC code. Therefore, we decrease
3312 the bar for unrestricted expression hoisting to the cost of PIC address
3313 calculation, which is 2 instructions. */
3314 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3315 global_options
.x_param_values
,
3316 global_options_set
.x_param_values
);
3318 /* ARM EABI defaults to strict volatile bitfields. */
3319 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3320 && abi_version_at_least(2))
3321 flag_strict_volatile_bitfields
= 1;
3323 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3324 have deemed it beneficial (signified by setting
3325 prefetch.num_slots to 1 or more). */
3326 if (flag_prefetch_loop_arrays
< 0
3329 && current_tune
->prefetch
.num_slots
> 0)
3330 flag_prefetch_loop_arrays
= 1;
3332 /* Set up parameters to be used in prefetching algorithm. Do not
3333 override the defaults unless we are tuning for a core we have
3334 researched values for. */
3335 if (current_tune
->prefetch
.num_slots
> 0)
3336 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3337 current_tune
->prefetch
.num_slots
,
3338 global_options
.x_param_values
,
3339 global_options_set
.x_param_values
);
3340 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3341 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3342 current_tune
->prefetch
.l1_cache_line_size
,
3343 global_options
.x_param_values
,
3344 global_options_set
.x_param_values
);
3345 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3346 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3347 current_tune
->prefetch
.l1_cache_size
,
3348 global_options
.x_param_values
,
3349 global_options_set
.x_param_values
);
3351 /* Use Neon to perform 64-bits operations rather than core
3353 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3354 if (use_neon_for_64bits
== 1)
3355 prefer_neon_for_64bits
= true;
3357 /* Use the alternative scheduling-pressure algorithm by default. */
3358 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3359 global_options
.x_param_values
,
3360 global_options_set
.x_param_values
);
3362 /* Look through ready list and all of queue for instructions
3363 relevant for L2 auto-prefetcher. */
3364 int param_sched_autopref_queue_depth
;
3366 switch (current_tune
->sched_autopref
)
3368 case tune_params::SCHED_AUTOPREF_OFF
:
3369 param_sched_autopref_queue_depth
= -1;
3372 case tune_params::SCHED_AUTOPREF_RANK
:
3373 param_sched_autopref_queue_depth
= 0;
3376 case tune_params::SCHED_AUTOPREF_FULL
:
3377 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3384 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3385 param_sched_autopref_queue_depth
,
3386 global_options
.x_param_values
,
3387 global_options_set
.x_param_values
);
3389 /* Currently, for slow flash data, we just disable literal pools. */
3390 if (target_slow_flash_data
)
3391 arm_disable_literal_pool
= true;
3393 /* Disable scheduling fusion by default if it's not armv7 processor
3394 or doesn't prefer ldrd/strd. */
3395 if (flag_schedule_fusion
== 2
3396 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3397 flag_schedule_fusion
= 0;
3399 /* Need to remember initial options before they are overriden. */
3400 init_optimize
= build_optimization_node (&global_options
);
3402 arm_option_override_internal (&global_options
, &global_options_set
);
3403 arm_option_check_internal (&global_options
);
3404 arm_option_params_internal ();
3406 /* Register global variables with the garbage collector. */
3407 arm_add_gc_roots ();
3409 /* Save the initial options in case the user does function specific
3411 target_option_default_node
= target_option_current_node
3412 = build_target_option_node (&global_options
);
3414 /* Init initial mode for testing. */
3415 thumb_flipper
= TARGET_THUMB
;
3419 arm_add_gc_roots (void)
3421 gcc_obstack_init(&minipool_obstack
);
3422 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3425 /* A table of known ARM exception types.
3426 For use with the interrupt function attribute. */
3430 const char *const arg
;
3431 const unsigned long return_value
;
3435 static const isr_attribute_arg isr_attribute_args
[] =
3437 { "IRQ", ARM_FT_ISR
},
3438 { "irq", ARM_FT_ISR
},
3439 { "FIQ", ARM_FT_FIQ
},
3440 { "fiq", ARM_FT_FIQ
},
3441 { "ABORT", ARM_FT_ISR
},
3442 { "abort", ARM_FT_ISR
},
3443 { "ABORT", ARM_FT_ISR
},
3444 { "abort", ARM_FT_ISR
},
3445 { "UNDEF", ARM_FT_EXCEPTION
},
3446 { "undef", ARM_FT_EXCEPTION
},
3447 { "SWI", ARM_FT_EXCEPTION
},
3448 { "swi", ARM_FT_EXCEPTION
},
3449 { NULL
, ARM_FT_NORMAL
}
3452 /* Returns the (interrupt) function type of the current
3453 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3455 static unsigned long
3456 arm_isr_value (tree argument
)
3458 const isr_attribute_arg
* ptr
;
3462 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3464 /* No argument - default to IRQ. */
3465 if (argument
== NULL_TREE
)
3468 /* Get the value of the argument. */
3469 if (TREE_VALUE (argument
) == NULL_TREE
3470 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3471 return ARM_FT_UNKNOWN
;
3473 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3475 /* Check it against the list of known arguments. */
3476 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3477 if (streq (arg
, ptr
->arg
))
3478 return ptr
->return_value
;
3480 /* An unrecognized interrupt type. */
3481 return ARM_FT_UNKNOWN
;
3484 /* Computes the type of the current function. */
3486 static unsigned long
3487 arm_compute_func_type (void)
3489 unsigned long type
= ARM_FT_UNKNOWN
;
3493 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3495 /* Decide if the current function is volatile. Such functions
3496 never return, and many memory cycles can be saved by not storing
3497 register values that will never be needed again. This optimization
3498 was added to speed up context switching in a kernel application. */
3500 && (TREE_NOTHROW (current_function_decl
)
3501 || !(flag_unwind_tables
3503 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3504 && TREE_THIS_VOLATILE (current_function_decl
))
3505 type
|= ARM_FT_VOLATILE
;
3507 if (cfun
->static_chain_decl
!= NULL
)
3508 type
|= ARM_FT_NESTED
;
3510 attr
= DECL_ATTRIBUTES (current_function_decl
);
3512 a
= lookup_attribute ("naked", attr
);
3514 type
|= ARM_FT_NAKED
;
3516 a
= lookup_attribute ("isr", attr
);
3518 a
= lookup_attribute ("interrupt", attr
);
3521 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3523 type
|= arm_isr_value (TREE_VALUE (a
));
3528 /* Returns the type of the current function. */
3531 arm_current_func_type (void)
3533 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3534 cfun
->machine
->func_type
= arm_compute_func_type ();
3536 return cfun
->machine
->func_type
;
3540 arm_allocate_stack_slots_for_args (void)
3542 /* Naked functions should not allocate stack slots for arguments. */
3543 return !IS_NAKED (arm_current_func_type ());
3547 arm_warn_func_return (tree decl
)
3549 /* Naked functions are implemented entirely in assembly, including the
3550 return sequence, so suppress warnings about this. */
3551 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3555 /* Output assembler code for a block containing the constant parts
3556 of a trampoline, leaving space for the variable parts.
3558 On the ARM, (if r8 is the static chain regnum, and remembering that
3559 referencing pc adds an offset of 8) the trampoline looks like:
3562 .word static chain value
3563 .word function's address
3564 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3567 arm_asm_trampoline_template (FILE *f
)
3569 if (TARGET_UNIFIED_ASM
)
3570 fprintf (f
, "\t.syntax unified\n");
3572 fprintf (f
, "\t.syntax divided\n");
3576 fprintf (f
, "\t.arm\n");
3577 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3578 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3580 else if (TARGET_THUMB2
)
3582 fprintf (f
, "\t.thumb\n");
3583 /* The Thumb-2 trampoline is similar to the arm implementation.
3584 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3585 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3586 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3587 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3591 ASM_OUTPUT_ALIGN (f
, 2);
3592 fprintf (f
, "\t.code\t16\n");
3593 fprintf (f
, ".Ltrampoline_start:\n");
3594 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3595 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3596 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3597 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3598 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3599 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3601 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3602 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3605 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3608 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3610 rtx fnaddr
, mem
, a_tramp
;
3612 emit_block_move (m_tramp
, assemble_trampoline_template (),
3613 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3615 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3616 emit_move_insn (mem
, chain_value
);
3618 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3619 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3620 emit_move_insn (mem
, fnaddr
);
3622 a_tramp
= XEXP (m_tramp
, 0);
3623 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3624 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3625 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3628 /* Thumb trampolines should be entered in thumb mode, so set
3629 the bottom bit of the address. */
3632 arm_trampoline_adjust_address (rtx addr
)
3635 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3636 NULL
, 0, OPTAB_LIB_WIDEN
);
3640 /* Return 1 if it is possible to return using a single instruction.
3641 If SIBLING is non-null, this is a test for a return before a sibling
3642 call. SIBLING is the call insn, so we can examine its register usage. */
3645 use_return_insn (int iscond
, rtx sibling
)
3648 unsigned int func_type
;
3649 unsigned long saved_int_regs
;
3650 unsigned HOST_WIDE_INT stack_adjust
;
3651 arm_stack_offsets
*offsets
;
3653 /* Never use a return instruction before reload has run. */
3654 if (!reload_completed
)
3657 func_type
= arm_current_func_type ();
3659 /* Naked, volatile and stack alignment functions need special
3661 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3664 /* So do interrupt functions that use the frame pointer and Thumb
3665 interrupt functions. */
3666 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3669 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3670 && !optimize_function_for_size_p (cfun
))
3673 offsets
= arm_get_frame_offsets ();
3674 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3676 /* As do variadic functions. */
3677 if (crtl
->args
.pretend_args_size
3678 || cfun
->machine
->uses_anonymous_args
3679 /* Or if the function calls __builtin_eh_return () */
3680 || crtl
->calls_eh_return
3681 /* Or if the function calls alloca */
3682 || cfun
->calls_alloca
3683 /* Or if there is a stack adjustment. However, if the stack pointer
3684 is saved on the stack, we can use a pre-incrementing stack load. */
3685 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3686 && stack_adjust
== 4))
3687 /* Or if the static chain register was saved above the frame, under the
3688 assumption that the stack pointer isn't saved on the stack. */
3689 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3690 && arm_compute_static_chain_stack_bytes() != 0))
3693 saved_int_regs
= offsets
->saved_regs_mask
;
3695 /* Unfortunately, the insn
3697 ldmib sp, {..., sp, ...}
3699 triggers a bug on most SA-110 based devices, such that the stack
3700 pointer won't be correctly restored if the instruction takes a
3701 page fault. We work around this problem by popping r3 along with
3702 the other registers, since that is never slower than executing
3703 another instruction.
3705 We test for !arm_arch5 here, because code for any architecture
3706 less than this could potentially be run on one of the buggy
3708 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3710 /* Validate that r3 is a call-clobbered register (always true in
3711 the default abi) ... */
3712 if (!call_used_regs
[3])
3715 /* ... that it isn't being used for a return value ... */
3716 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3719 /* ... or for a tail-call argument ... */
3722 gcc_assert (CALL_P (sibling
));
3724 if (find_regno_fusage (sibling
, USE
, 3))
3728 /* ... and that there are no call-saved registers in r0-r2
3729 (always true in the default ABI). */
3730 if (saved_int_regs
& 0x7)
3734 /* Can't be done if interworking with Thumb, and any registers have been
3736 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3739 /* On StrongARM, conditional returns are expensive if they aren't
3740 taken and multiple registers have been stacked. */
3741 if (iscond
&& arm_tune_strongarm
)
3743 /* Conditional return when just the LR is stored is a simple
3744 conditional-load instruction, that's not expensive. */
3745 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3749 && arm_pic_register
!= INVALID_REGNUM
3750 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3754 /* If there are saved registers but the LR isn't saved, then we need
3755 two instructions for the return. */
3756 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3759 /* Can't be done if any of the VFP regs are pushed,
3760 since this also requires an insn. */
3761 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
3762 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3763 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3766 if (TARGET_REALLY_IWMMXT
)
3767 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3768 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3774 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3775 shrink-wrapping if possible. This is the case if we need to emit a
3776 prologue, which we can test by looking at the offsets. */
3778 use_simple_return_p (void)
3780 arm_stack_offsets
*offsets
;
3782 offsets
= arm_get_frame_offsets ();
3783 return offsets
->outgoing_args
!= 0;
3786 /* Return TRUE if int I is a valid immediate ARM constant. */
3789 const_ok_for_arm (HOST_WIDE_INT i
)
3793 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3794 be all zero, or all one. */
3795 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3796 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3797 != ((~(unsigned HOST_WIDE_INT
) 0)
3798 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3801 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3803 /* Fast return for 0 and small values. We must do this for zero, since
3804 the code below can't handle that one case. */
3805 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3808 /* Get the number of trailing zeros. */
3809 lowbit
= ffs((int) i
) - 1;
3811 /* Only even shifts are allowed in ARM mode so round down to the
3812 nearest even number. */
3816 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3821 /* Allow rotated constants in ARM mode. */
3823 && ((i
& ~0xc000003f) == 0
3824 || (i
& ~0xf000000f) == 0
3825 || (i
& ~0xfc000003) == 0))
3832 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3835 if (i
== v
|| i
== (v
| (v
<< 8)))
3838 /* Allow repeated pattern 0xXY00XY00. */
3848 /* Return true if I is a valid constant for the operation CODE. */
3850 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3852 if (const_ok_for_arm (i
))
3858 /* See if we can use movw. */
3859 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
3862 /* Otherwise, try mvn. */
3863 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3866 /* See if we can use addw or subw. */
3868 && ((i
& 0xfffff000) == 0
3869 || ((-i
) & 0xfffff000) == 0))
3871 /* else fall through. */
3891 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3893 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3899 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3903 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3910 /* Return true if I is a valid di mode constant for the operation CODE. */
3912 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
3914 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
3915 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
3916 rtx hi
= GEN_INT (hi_val
);
3917 rtx lo
= GEN_INT (lo_val
);
3927 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
3928 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
3930 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
3937 /* Emit a sequence of insns to handle a large constant.
3938 CODE is the code of the operation required, it can be any of SET, PLUS,
3939 IOR, AND, XOR, MINUS;
3940 MODE is the mode in which the operation is being performed;
3941 VAL is the integer to operate on;
3942 SOURCE is the other operand (a register, or a null-pointer for SET);
3943 SUBTARGETS means it is safe to create scratch registers if that will
3944 either produce a simpler sequence, or we will want to cse the values.
3945 Return value is the number of insns emitted. */
3947 /* ??? Tweak this for thumb2. */
3949 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
3950 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
3954 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
3955 cond
= COND_EXEC_TEST (PATTERN (insn
));
3959 if (subtargets
|| code
== SET
3960 || (REG_P (target
) && REG_P (source
)
3961 && REGNO (target
) != REGNO (source
)))
3963 /* After arm_reorg has been called, we can't fix up expensive
3964 constants by pushing them into memory so we must synthesize
3965 them in-line, regardless of the cost. This is only likely to
3966 be more costly on chips that have load delay slots and we are
3967 compiling without running the scheduler (so no splitting
3968 occurred before the final instruction emission).
3970 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3972 if (!cfun
->machine
->after_arm_reorg
3974 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
3976 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
3981 /* Currently SET is the only monadic value for CODE, all
3982 the rest are diadic. */
3983 if (TARGET_USE_MOVT
)
3984 arm_emit_movpair (target
, GEN_INT (val
));
3986 emit_set_insn (target
, GEN_INT (val
));
3992 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
3994 if (TARGET_USE_MOVT
)
3995 arm_emit_movpair (temp
, GEN_INT (val
));
3997 emit_set_insn (temp
, GEN_INT (val
));
3999 /* For MINUS, the value is subtracted from, since we never
4000 have subtraction of a constant. */
4002 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4004 emit_set_insn (target
,
4005 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4011 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4015 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4016 ARM/THUMB2 immediates, and add up to VAL.
4017 Thr function return value gives the number of insns required. */
4019 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4020 struct four_ints
*return_sequence
)
4022 int best_consecutive_zeros
= 0;
4026 struct four_ints tmp_sequence
;
4028 /* If we aren't targeting ARM, the best place to start is always at
4029 the bottom, otherwise look more closely. */
4032 for (i
= 0; i
< 32; i
+= 2)
4034 int consecutive_zeros
= 0;
4036 if (!(val
& (3 << i
)))
4038 while ((i
< 32) && !(val
& (3 << i
)))
4040 consecutive_zeros
+= 2;
4043 if (consecutive_zeros
> best_consecutive_zeros
)
4045 best_consecutive_zeros
= consecutive_zeros
;
4046 best_start
= i
- consecutive_zeros
;
4053 /* So long as it won't require any more insns to do so, it's
4054 desirable to emit a small constant (in bits 0...9) in the last
4055 insn. This way there is more chance that it can be combined with
4056 a later addressing insn to form a pre-indexed load or store
4057 operation. Consider:
4059 *((volatile int *)0xe0000100) = 1;
4060 *((volatile int *)0xe0000110) = 2;
4062 We want this to wind up as:
4066 str rB, [rA, #0x100]
4068 str rB, [rA, #0x110]
4070 rather than having to synthesize both large constants from scratch.
4072 Therefore, we calculate how many insns would be required to emit
4073 the constant starting from `best_start', and also starting from
4074 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4075 yield a shorter sequence, we may as well use zero. */
4076 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4078 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < val
))
4080 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4081 if (insns2
<= insns1
)
4083 *return_sequence
= tmp_sequence
;
4091 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4093 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4094 struct four_ints
*return_sequence
, int i
)
4096 int remainder
= val
& 0xffffffff;
4099 /* Try and find a way of doing the job in either two or three
4102 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4103 location. We start at position I. This may be the MSB, or
4104 optimial_immediate_sequence may have positioned it at the largest block
4105 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4106 wrapping around to the top of the word when we drop off the bottom.
4107 In the worst case this code should produce no more than four insns.
4109 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4110 constants, shifted to any arbitrary location. We should always start
4115 unsigned int b1
, b2
, b3
, b4
;
4116 unsigned HOST_WIDE_INT result
;
4119 gcc_assert (insns
< 4);
4124 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4125 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4128 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4129 /* We can use addw/subw for the last 12 bits. */
4133 /* Use an 8-bit shifted/rotated immediate. */
4137 result
= remainder
& ((0x0ff << end
)
4138 | ((i
< end
) ? (0xff >> (32 - end
))
4145 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4146 arbitrary shifts. */
4147 i
-= TARGET_ARM
? 2 : 1;
4151 /* Next, see if we can do a better job with a thumb2 replicated
4154 We do it this way around to catch the cases like 0x01F001E0 where
4155 two 8-bit immediates would work, but a replicated constant would
4158 TODO: 16-bit constants that don't clear all the bits, but still win.
4159 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4162 b1
= (remainder
& 0xff000000) >> 24;
4163 b2
= (remainder
& 0x00ff0000) >> 16;
4164 b3
= (remainder
& 0x0000ff00) >> 8;
4165 b4
= remainder
& 0xff;
4169 /* The 8-bit immediate already found clears b1 (and maybe b2),
4170 but must leave b3 and b4 alone. */
4172 /* First try to find a 32-bit replicated constant that clears
4173 almost everything. We can assume that we can't do it in one,
4174 or else we wouldn't be here. */
4175 unsigned int tmp
= b1
& b2
& b3
& b4
;
4176 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4178 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4179 + (tmp
== b3
) + (tmp
== b4
);
4181 && (matching_bytes
>= 3
4182 || (matching_bytes
== 2
4183 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4185 /* At least 3 of the bytes match, and the fourth has at
4186 least as many bits set, or two of the bytes match
4187 and it will only require one more insn to finish. */
4195 /* Second, try to find a 16-bit replicated constant that can
4196 leave three of the bytes clear. If b2 or b4 is already
4197 zero, then we can. If the 8-bit from above would not
4198 clear b2 anyway, then we still win. */
4199 else if (b1
== b3
&& (!b2
|| !b4
4200 || (remainder
& 0x00ff0000 & ~result
)))
4202 result
= remainder
& 0xff00ff00;
4208 /* The 8-bit immediate already found clears b2 (and maybe b3)
4209 and we don't get here unless b1 is alredy clear, but it will
4210 leave b4 unchanged. */
4212 /* If we can clear b2 and b4 at once, then we win, since the
4213 8-bits couldn't possibly reach that far. */
4216 result
= remainder
& 0x00ff00ff;
4222 return_sequence
->i
[insns
++] = result
;
4223 remainder
&= ~result
;
4225 if (code
== SET
|| code
== MINUS
)
4233 /* Emit an instruction with the indicated PATTERN. If COND is
4234 non-NULL, conditionalize the execution of the instruction on COND
4238 emit_constant_insn (rtx cond
, rtx pattern
)
4241 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4242 emit_insn (pattern
);
4245 /* As above, but extra parameter GENERATE which, if clear, suppresses
4249 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4250 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4251 int subtargets
, int generate
)
4255 int final_invert
= 0;
4257 int set_sign_bit_copies
= 0;
4258 int clear_sign_bit_copies
= 0;
4259 int clear_zero_bit_copies
= 0;
4260 int set_zero_bit_copies
= 0;
4261 int insns
= 0, neg_insns
, inv_insns
;
4262 unsigned HOST_WIDE_INT temp1
, temp2
;
4263 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4264 struct four_ints
*immediates
;
4265 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4267 /* Find out which operations are safe for a given CODE. Also do a quick
4268 check for degenerate cases; these can occur when DImode operations
4281 if (remainder
== 0xffffffff)
4284 emit_constant_insn (cond
,
4285 gen_rtx_SET (target
,
4286 GEN_INT (ARM_SIGN_EXTEND (val
))));
4292 if (reload_completed
&& rtx_equal_p (target
, source
))
4296 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4305 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4308 if (remainder
== 0xffffffff)
4310 if (reload_completed
&& rtx_equal_p (target
, source
))
4313 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4322 if (reload_completed
&& rtx_equal_p (target
, source
))
4325 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4329 if (remainder
== 0xffffffff)
4332 emit_constant_insn (cond
,
4333 gen_rtx_SET (target
,
4334 gen_rtx_NOT (mode
, source
)));
4341 /* We treat MINUS as (val - source), since (source - val) is always
4342 passed as (source + (-val)). */
4346 emit_constant_insn (cond
,
4347 gen_rtx_SET (target
,
4348 gen_rtx_NEG (mode
, source
)));
4351 if (const_ok_for_arm (val
))
4354 emit_constant_insn (cond
,
4355 gen_rtx_SET (target
,
4356 gen_rtx_MINUS (mode
, GEN_INT (val
),
4367 /* If we can do it in one insn get out quickly. */
4368 if (const_ok_for_op (val
, code
))
4371 emit_constant_insn (cond
,
4372 gen_rtx_SET (target
,
4374 ? gen_rtx_fmt_ee (code
, mode
, source
,
4380 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4382 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4383 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4387 if (mode
== SImode
&& i
== 16)
4388 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4390 emit_constant_insn (cond
,
4391 gen_zero_extendhisi2
4392 (target
, gen_lowpart (HImode
, source
)));
4394 /* Extz only supports SImode, but we can coerce the operands
4396 emit_constant_insn (cond
,
4397 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4398 gen_lowpart (SImode
, source
),
4399 GEN_INT (i
), const0_rtx
));
4405 /* Calculate a few attributes that may be useful for specific
4407 /* Count number of leading zeros. */
4408 for (i
= 31; i
>= 0; i
--)
4410 if ((remainder
& (1 << i
)) == 0)
4411 clear_sign_bit_copies
++;
4416 /* Count number of leading 1's. */
4417 for (i
= 31; i
>= 0; i
--)
4419 if ((remainder
& (1 << i
)) != 0)
4420 set_sign_bit_copies
++;
4425 /* Count number of trailing zero's. */
4426 for (i
= 0; i
<= 31; i
++)
4428 if ((remainder
& (1 << i
)) == 0)
4429 clear_zero_bit_copies
++;
4434 /* Count number of trailing 1's. */
4435 for (i
= 0; i
<= 31; i
++)
4437 if ((remainder
& (1 << i
)) != 0)
4438 set_zero_bit_copies
++;
4446 /* See if we can do this by sign_extending a constant that is known
4447 to be negative. This is a good, way of doing it, since the shift
4448 may well merge into a subsequent insn. */
4449 if (set_sign_bit_copies
> 1)
4451 if (const_ok_for_arm
4452 (temp1
= ARM_SIGN_EXTEND (remainder
4453 << (set_sign_bit_copies
- 1))))
4457 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4458 emit_constant_insn (cond
,
4459 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4460 emit_constant_insn (cond
,
4461 gen_ashrsi3 (target
, new_src
,
4462 GEN_INT (set_sign_bit_copies
- 1)));
4466 /* For an inverted constant, we will need to set the low bits,
4467 these will be shifted out of harm's way. */
4468 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4469 if (const_ok_for_arm (~temp1
))
4473 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4474 emit_constant_insn (cond
,
4475 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4476 emit_constant_insn (cond
,
4477 gen_ashrsi3 (target
, new_src
,
4478 GEN_INT (set_sign_bit_copies
- 1)));
4484 /* See if we can calculate the value as the difference between two
4485 valid immediates. */
4486 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4488 int topshift
= clear_sign_bit_copies
& ~1;
4490 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4491 & (0xff000000 >> topshift
));
4493 /* If temp1 is zero, then that means the 9 most significant
4494 bits of remainder were 1 and we've caused it to overflow.
4495 When topshift is 0 we don't need to do anything since we
4496 can borrow from 'bit 32'. */
4497 if (temp1
== 0 && topshift
!= 0)
4498 temp1
= 0x80000000 >> (topshift
- 1);
4500 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4502 if (const_ok_for_arm (temp2
))
4506 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4507 emit_constant_insn (cond
,
4508 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4509 emit_constant_insn (cond
,
4510 gen_addsi3 (target
, new_src
,
4518 /* See if we can generate this by setting the bottom (or the top)
4519 16 bits, and then shifting these into the other half of the
4520 word. We only look for the simplest cases, to do more would cost
4521 too much. Be careful, however, not to generate this when the
4522 alternative would take fewer insns. */
4523 if (val
& 0xffff0000)
4525 temp1
= remainder
& 0xffff0000;
4526 temp2
= remainder
& 0x0000ffff;
4528 /* Overlaps outside this range are best done using other methods. */
4529 for (i
= 9; i
< 24; i
++)
4531 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4532 && !const_ok_for_arm (temp2
))
4534 rtx new_src
= (subtargets
4535 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4537 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4538 source
, subtargets
, generate
);
4546 gen_rtx_ASHIFT (mode
, source
,
4553 /* Don't duplicate cases already considered. */
4554 for (i
= 17; i
< 24; i
++)
4556 if (((temp1
| (temp1
>> i
)) == remainder
)
4557 && !const_ok_for_arm (temp1
))
4559 rtx new_src
= (subtargets
4560 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4562 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4563 source
, subtargets
, generate
);
4568 gen_rtx_SET (target
,
4571 gen_rtx_LSHIFTRT (mode
, source
,
4582 /* If we have IOR or XOR, and the constant can be loaded in a
4583 single instruction, and we can find a temporary to put it in,
4584 then this can be done in two instructions instead of 3-4. */
4586 /* TARGET can't be NULL if SUBTARGETS is 0 */
4587 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4589 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4593 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4595 emit_constant_insn (cond
,
4596 gen_rtx_SET (sub
, GEN_INT (val
)));
4597 emit_constant_insn (cond
,
4598 gen_rtx_SET (target
,
4599 gen_rtx_fmt_ee (code
, mode
,
4610 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4611 and the remainder 0s for e.g. 0xfff00000)
4612 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4614 This can be done in 2 instructions by using shifts with mov or mvn.
4619 mvn r0, r0, lsr #12 */
4620 if (set_sign_bit_copies
> 8
4621 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4625 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4626 rtx shift
= GEN_INT (set_sign_bit_copies
);
4632 gen_rtx_ASHIFT (mode
,
4637 gen_rtx_SET (target
,
4639 gen_rtx_LSHIFTRT (mode
, sub
,
4646 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4648 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4650 For eg. r0 = r0 | 0xfff
4655 if (set_zero_bit_copies
> 8
4656 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4660 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4661 rtx shift
= GEN_INT (set_zero_bit_copies
);
4667 gen_rtx_LSHIFTRT (mode
,
4672 gen_rtx_SET (target
,
4674 gen_rtx_ASHIFT (mode
, sub
,
4680 /* This will never be reached for Thumb2 because orn is a valid
4681 instruction. This is for Thumb1 and the ARM 32 bit cases.
4683 x = y | constant (such that ~constant is a valid constant)
4685 x = ~(~y & ~constant).
4687 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4691 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4692 emit_constant_insn (cond
,
4694 gen_rtx_NOT (mode
, source
)));
4697 sub
= gen_reg_rtx (mode
);
4698 emit_constant_insn (cond
,
4700 gen_rtx_AND (mode
, source
,
4702 emit_constant_insn (cond
,
4703 gen_rtx_SET (target
,
4704 gen_rtx_NOT (mode
, sub
)));
4711 /* See if two shifts will do 2 or more insn's worth of work. */
4712 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4714 HOST_WIDE_INT shift_mask
= ((0xffffffff
4715 << (32 - clear_sign_bit_copies
))
4718 if ((remainder
| shift_mask
) != 0xffffffff)
4720 HOST_WIDE_INT new_val
4721 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4725 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4726 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4727 new_src
, source
, subtargets
, 1);
4732 rtx targ
= subtargets
? NULL_RTX
: target
;
4733 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4734 targ
, source
, subtargets
, 0);
4740 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4741 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4743 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4744 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4750 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4752 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4754 if ((remainder
| shift_mask
) != 0xffffffff)
4756 HOST_WIDE_INT new_val
4757 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4760 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4762 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4763 new_src
, source
, subtargets
, 1);
4768 rtx targ
= subtargets
? NULL_RTX
: target
;
4770 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4771 targ
, source
, subtargets
, 0);
4777 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4778 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4780 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4781 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4793 /* Calculate what the instruction sequences would be if we generated it
4794 normally, negated, or inverted. */
4796 /* AND cannot be split into multiple insns, so invert and use BIC. */
4799 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4802 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4807 if (can_invert
|| final_invert
)
4808 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4813 immediates
= &pos_immediates
;
4815 /* Is the negated immediate sequence more efficient? */
4816 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4819 immediates
= &neg_immediates
;
4824 /* Is the inverted immediate sequence more efficient?
4825 We must allow for an extra NOT instruction for XOR operations, although
4826 there is some chance that the final 'mvn' will get optimized later. */
4827 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4830 immediates
= &inv_immediates
;
4838 /* Now output the chosen sequence as instructions. */
4841 for (i
= 0; i
< insns
; i
++)
4843 rtx new_src
, temp1_rtx
;
4845 temp1
= immediates
->i
[i
];
4847 if (code
== SET
|| code
== MINUS
)
4848 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4849 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4850 new_src
= gen_reg_rtx (mode
);
4856 else if (can_negate
)
4859 temp1
= trunc_int_for_mode (temp1
, mode
);
4860 temp1_rtx
= GEN_INT (temp1
);
4864 else if (code
== MINUS
)
4865 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4867 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4869 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
4874 can_negate
= can_invert
;
4878 else if (code
== MINUS
)
4886 emit_constant_insn (cond
, gen_rtx_SET (target
,
4887 gen_rtx_NOT (mode
, source
)));
4894 /* Canonicalize a comparison so that we are more likely to recognize it.
4895 This can be done for a few constant compares, where we can make the
4896 immediate value easier to load. */
4899 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4900 bool op0_preserve_value
)
4903 unsigned HOST_WIDE_INT i
, maxval
;
4905 mode
= GET_MODE (*op0
);
4906 if (mode
== VOIDmode
)
4907 mode
= GET_MODE (*op1
);
4909 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
4911 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4912 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4913 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4914 for GTU/LEU in Thumb mode. */
4918 if (*code
== GT
|| *code
== LE
4919 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
4921 /* Missing comparison. First try to use an available
4923 if (CONST_INT_P (*op1
))
4931 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4933 *op1
= GEN_INT (i
+ 1);
4934 *code
= *code
== GT
? GE
: LT
;
4940 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
4941 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
4943 *op1
= GEN_INT (i
+ 1);
4944 *code
= *code
== GTU
? GEU
: LTU
;
4953 /* If that did not work, reverse the condition. */
4954 if (!op0_preserve_value
)
4956 std::swap (*op0
, *op1
);
4957 *code
= (int)swap_condition ((enum rtx_code
)*code
);
4963 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4964 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4965 to facilitate possible combining with a cmp into 'ands'. */
4967 && GET_CODE (*op0
) == ZERO_EXTEND
4968 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
4969 && GET_MODE (XEXP (*op0
, 0)) == QImode
4970 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
4971 && subreg_lowpart_p (XEXP (*op0
, 0))
4972 && *op1
== const0_rtx
)
4973 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
4976 /* Comparisons smaller than DImode. Only adjust comparisons against
4977 an out-of-range constant. */
4978 if (!CONST_INT_P (*op1
)
4979 || const_ok_for_arm (INTVAL (*op1
))
4980 || const_ok_for_arm (- INTVAL (*op1
)))
4994 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
4996 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
4997 *code
= *code
== GT
? GE
: LT
;
5005 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5007 *op1
= GEN_INT (i
- 1);
5008 *code
= *code
== GE
? GT
: LE
;
5015 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5016 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5018 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5019 *code
= *code
== GTU
? GEU
: LTU
;
5027 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5029 *op1
= GEN_INT (i
- 1);
5030 *code
= *code
== GEU
? GTU
: LEU
;
5041 /* Define how to find the value returned by a function. */
5044 arm_function_value(const_tree type
, const_tree func
,
5045 bool outgoing ATTRIBUTE_UNUSED
)
5048 int unsignedp ATTRIBUTE_UNUSED
;
5049 rtx r ATTRIBUTE_UNUSED
;
5051 mode
= TYPE_MODE (type
);
5053 if (TARGET_AAPCS_BASED
)
5054 return aapcs_allocate_return_reg (mode
, type
, func
);
5056 /* Promote integer types. */
5057 if (INTEGRAL_TYPE_P (type
))
5058 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5060 /* Promotes small structs returned in a register to full-word size
5061 for big-endian AAPCS. */
5062 if (arm_return_in_msb (type
))
5064 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5065 if (size
% UNITS_PER_WORD
!= 0)
5067 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5068 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5072 return arm_libcall_value_1 (mode
);
5075 /* libcall hashtable helpers. */
5077 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5079 static inline hashval_t
hash (const rtx_def
*);
5080 static inline bool equal (const rtx_def
*, const rtx_def
*);
5081 static inline void remove (rtx_def
*);
5085 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5087 return rtx_equal_p (p1
, p2
);
5091 libcall_hasher::hash (const rtx_def
*p1
)
5093 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5096 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5099 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5101 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5105 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5107 static bool init_done
= false;
5108 static libcall_table_type
*libcall_htab
= NULL
;
5114 libcall_htab
= new libcall_table_type (31);
5115 add_libcall (libcall_htab
,
5116 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5117 add_libcall (libcall_htab
,
5118 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5119 add_libcall (libcall_htab
,
5120 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5121 add_libcall (libcall_htab
,
5122 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5124 add_libcall (libcall_htab
,
5125 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5126 add_libcall (libcall_htab
,
5127 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5128 add_libcall (libcall_htab
,
5129 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5130 add_libcall (libcall_htab
,
5131 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5133 add_libcall (libcall_htab
,
5134 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5135 add_libcall (libcall_htab
,
5136 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5137 add_libcall (libcall_htab
,
5138 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5139 add_libcall (libcall_htab
,
5140 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5141 add_libcall (libcall_htab
,
5142 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5143 add_libcall (libcall_htab
,
5144 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5145 add_libcall (libcall_htab
,
5146 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5147 add_libcall (libcall_htab
,
5148 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5150 /* Values from double-precision helper functions are returned in core
5151 registers if the selected core only supports single-precision
5152 arithmetic, even if we are using the hard-float ABI. The same is
5153 true for single-precision helpers, but we will never be using the
5154 hard-float ABI on a CPU which doesn't support single-precision
5155 operations in hardware. */
5156 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5157 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5158 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5159 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5160 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5161 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5162 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5163 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5164 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5165 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5166 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5167 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5169 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5173 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5177 arm_libcall_value_1 (machine_mode mode
)
5179 if (TARGET_AAPCS_BASED
)
5180 return aapcs_libcall_value (mode
);
5181 else if (TARGET_IWMMXT_ABI
5182 && arm_vector_mode_supported_p (mode
))
5183 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5185 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5188 /* Define how to find the value returned by a library function
5189 assuming the value has mode MODE. */
5192 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5194 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5195 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5197 /* The following libcalls return their result in integer registers,
5198 even though they return a floating point value. */
5199 if (arm_libcall_uses_aapcs_base (libcall
))
5200 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5204 return arm_libcall_value_1 (mode
);
5207 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5210 arm_function_value_regno_p (const unsigned int regno
)
5212 if (regno
== ARG_REGISTER (1)
5214 && TARGET_AAPCS_BASED
5216 && TARGET_HARD_FLOAT
5217 && regno
== FIRST_VFP_REGNUM
)
5218 || (TARGET_IWMMXT_ABI
5219 && regno
== FIRST_IWMMXT_REGNUM
))
5225 /* Determine the amount of memory needed to store the possible return
5226 registers of an untyped call. */
5228 arm_apply_result_size (void)
5234 if (TARGET_HARD_FLOAT_ABI
&& TARGET_VFP
)
5236 if (TARGET_IWMMXT_ABI
)
5243 /* Decide whether TYPE should be returned in memory (true)
5244 or in a register (false). FNTYPE is the type of the function making
5247 arm_return_in_memory (const_tree type
, const_tree fntype
)
5251 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5253 if (TARGET_AAPCS_BASED
)
5255 /* Simple, non-aggregate types (ie not including vectors and
5256 complex) are always returned in a register (or registers).
5257 We don't care about which register here, so we can short-cut
5258 some of the detail. */
5259 if (!AGGREGATE_TYPE_P (type
)
5260 && TREE_CODE (type
) != VECTOR_TYPE
5261 && TREE_CODE (type
) != COMPLEX_TYPE
)
5264 /* Any return value that is no larger than one word can be
5266 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5269 /* Check any available co-processors to see if they accept the
5270 type as a register candidate (VFP, for example, can return
5271 some aggregates in consecutive registers). These aren't
5272 available if the call is variadic. */
5273 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5276 /* Vector values should be returned using ARM registers, not
5277 memory (unless they're over 16 bytes, which will break since
5278 we only have four call-clobbered registers to play with). */
5279 if (TREE_CODE (type
) == VECTOR_TYPE
)
5280 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5282 /* The rest go in memory. */
5286 if (TREE_CODE (type
) == VECTOR_TYPE
)
5287 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5289 if (!AGGREGATE_TYPE_P (type
) &&
5290 (TREE_CODE (type
) != VECTOR_TYPE
))
5291 /* All simple types are returned in registers. */
5294 if (arm_abi
!= ARM_ABI_APCS
)
5296 /* ATPCS and later return aggregate types in memory only if they are
5297 larger than a word (or are variable size). */
5298 return (size
< 0 || size
> UNITS_PER_WORD
);
5301 /* For the arm-wince targets we choose to be compatible with Microsoft's
5302 ARM and Thumb compilers, which always return aggregates in memory. */
5304 /* All structures/unions bigger than one word are returned in memory.
5305 Also catch the case where int_size_in_bytes returns -1. In this case
5306 the aggregate is either huge or of variable size, and in either case
5307 we will want to return it via memory and not in a register. */
5308 if (size
< 0 || size
> UNITS_PER_WORD
)
5311 if (TREE_CODE (type
) == RECORD_TYPE
)
5315 /* For a struct the APCS says that we only return in a register
5316 if the type is 'integer like' and every addressable element
5317 has an offset of zero. For practical purposes this means
5318 that the structure can have at most one non bit-field element
5319 and that this element must be the first one in the structure. */
5321 /* Find the first field, ignoring non FIELD_DECL things which will
5322 have been created by C++. */
5323 for (field
= TYPE_FIELDS (type
);
5324 field
&& TREE_CODE (field
) != FIELD_DECL
;
5325 field
= DECL_CHAIN (field
))
5329 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5331 /* Check that the first field is valid for returning in a register. */
5333 /* ... Floats are not allowed */
5334 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5337 /* ... Aggregates that are not themselves valid for returning in
5338 a register are not allowed. */
5339 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5342 /* Now check the remaining fields, if any. Only bitfields are allowed,
5343 since they are not addressable. */
5344 for (field
= DECL_CHAIN (field
);
5346 field
= DECL_CHAIN (field
))
5348 if (TREE_CODE (field
) != FIELD_DECL
)
5351 if (!DECL_BIT_FIELD_TYPE (field
))
5358 if (TREE_CODE (type
) == UNION_TYPE
)
5362 /* Unions can be returned in registers if every element is
5363 integral, or can be returned in an integer register. */
5364 for (field
= TYPE_FIELDS (type
);
5366 field
= DECL_CHAIN (field
))
5368 if (TREE_CODE (field
) != FIELD_DECL
)
5371 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5374 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5380 #endif /* not ARM_WINCE */
5382 /* Return all other types in memory. */
5386 const struct pcs_attribute_arg
5390 } pcs_attribute_args
[] =
5392 {"aapcs", ARM_PCS_AAPCS
},
5393 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5395 /* We could recognize these, but changes would be needed elsewhere
5396 * to implement them. */
5397 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5398 {"atpcs", ARM_PCS_ATPCS
},
5399 {"apcs", ARM_PCS_APCS
},
5401 {NULL
, ARM_PCS_UNKNOWN
}
5405 arm_pcs_from_attribute (tree attr
)
5407 const struct pcs_attribute_arg
*ptr
;
5410 /* Get the value of the argument. */
5411 if (TREE_VALUE (attr
) == NULL_TREE
5412 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5413 return ARM_PCS_UNKNOWN
;
5415 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5417 /* Check it against the list of known arguments. */
5418 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5419 if (streq (arg
, ptr
->arg
))
5422 /* An unrecognized interrupt type. */
5423 return ARM_PCS_UNKNOWN
;
5426 /* Get the PCS variant to use for this call. TYPE is the function's type
5427 specification, DECL is the specific declartion. DECL may be null if
5428 the call could be indirect or if this is a library call. */
5430 arm_get_pcs_model (const_tree type
, const_tree decl
)
5432 bool user_convention
= false;
5433 enum arm_pcs user_pcs
= arm_pcs_default
;
5438 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5441 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5442 user_convention
= true;
5445 if (TARGET_AAPCS_BASED
)
5447 /* Detect varargs functions. These always use the base rules
5448 (no argument is ever a candidate for a co-processor
5450 bool base_rules
= stdarg_p (type
);
5452 if (user_convention
)
5454 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5455 sorry ("non-AAPCS derived PCS variant");
5456 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5457 error ("variadic functions must use the base AAPCS variant");
5461 return ARM_PCS_AAPCS
;
5462 else if (user_convention
)
5464 else if (decl
&& flag_unit_at_a_time
)
5466 /* Local functions never leak outside this compilation unit,
5467 so we are free to use whatever conventions are
5469 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5470 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5472 return ARM_PCS_AAPCS_LOCAL
;
5475 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5476 sorry ("PCS variant");
5478 /* For everything else we use the target's default. */
5479 return arm_pcs_default
;
5484 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5485 const_tree fntype ATTRIBUTE_UNUSED
,
5486 rtx libcall ATTRIBUTE_UNUSED
,
5487 const_tree fndecl ATTRIBUTE_UNUSED
)
5489 /* Record the unallocated VFP registers. */
5490 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5491 pcum
->aapcs_vfp_reg_alloc
= 0;
5494 /* Walk down the type tree of TYPE counting consecutive base elements.
5495 If *MODEP is VOIDmode, then set it to the first valid floating point
5496 type. If a non-floating point type is found, or if a floating point
5497 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5498 otherwise return the count in the sub-tree. */
5500 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5505 switch (TREE_CODE (type
))
5508 mode
= TYPE_MODE (type
);
5509 if (mode
!= DFmode
&& mode
!= SFmode
)
5512 if (*modep
== VOIDmode
)
5521 mode
= TYPE_MODE (TREE_TYPE (type
));
5522 if (mode
!= DFmode
&& mode
!= SFmode
)
5525 if (*modep
== VOIDmode
)
5534 /* Use V2SImode and V4SImode as representatives of all 64-bit
5535 and 128-bit vector types, whether or not those modes are
5536 supported with the present options. */
5537 size
= int_size_in_bytes (type
);
5550 if (*modep
== VOIDmode
)
5553 /* Vector modes are considered to be opaque: two vectors are
5554 equivalent for the purposes of being homogeneous aggregates
5555 if they are the same size. */
5564 tree index
= TYPE_DOMAIN (type
);
5566 /* Can't handle incomplete types nor sizes that are not
5568 if (!COMPLETE_TYPE_P (type
)
5569 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5572 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5575 || !TYPE_MAX_VALUE (index
)
5576 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5577 || !TYPE_MIN_VALUE (index
)
5578 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5582 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5583 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5585 /* There must be no padding. */
5586 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5598 /* Can't handle incomplete types nor sizes that are not
5600 if (!COMPLETE_TYPE_P (type
)
5601 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5604 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5606 if (TREE_CODE (field
) != FIELD_DECL
)
5609 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5615 /* There must be no padding. */
5616 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5623 case QUAL_UNION_TYPE
:
5625 /* These aren't very interesting except in a degenerate case. */
5630 /* Can't handle incomplete types nor sizes that are not
5632 if (!COMPLETE_TYPE_P (type
)
5633 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5636 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5638 if (TREE_CODE (field
) != FIELD_DECL
)
5641 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5644 count
= count
> sub_count
? count
: sub_count
;
5647 /* There must be no padding. */
5648 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5661 /* Return true if PCS_VARIANT should use VFP registers. */
5663 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5665 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5667 static bool seen_thumb1_vfp
= false;
5669 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5671 sorry ("Thumb-1 hard-float VFP ABI");
5672 /* sorry() is not immediately fatal, so only display this once. */
5673 seen_thumb1_vfp
= true;
5679 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5682 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
5683 (TARGET_VFP_DOUBLE
|| !is_double
));
5686 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5687 suitable for passing or returning in VFP registers for the PCS
5688 variant selected. If it is, then *BASE_MODE is updated to contain
5689 a machine mode describing each element of the argument's type and
5690 *COUNT to hold the number of such elements. */
5692 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5693 machine_mode mode
, const_tree type
,
5694 machine_mode
*base_mode
, int *count
)
5696 machine_mode new_mode
= VOIDmode
;
5698 /* If we have the type information, prefer that to working things
5699 out from the mode. */
5702 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5704 if (ag_count
> 0 && ag_count
<= 4)
5709 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5710 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5711 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5716 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5719 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5725 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5728 *base_mode
= new_mode
;
5733 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5734 machine_mode mode
, const_tree type
)
5736 int count ATTRIBUTE_UNUSED
;
5737 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5739 if (!use_vfp_abi (pcs_variant
, false))
5741 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5746 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5749 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5752 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5753 &pcum
->aapcs_vfp_rmode
,
5754 &pcum
->aapcs_vfp_rcount
);
5758 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5759 const_tree type ATTRIBUTE_UNUSED
)
5761 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
5762 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5765 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5766 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5768 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5770 || (mode
== TImode
&& ! TARGET_NEON
)
5771 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5774 int rcount
= pcum
->aapcs_vfp_rcount
;
5776 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5780 /* Avoid using unsupported vector modes. */
5781 if (rmode
== V2SImode
)
5783 else if (rmode
== V4SImode
)
5790 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5791 for (i
= 0; i
< rcount
; i
++)
5793 rtx tmp
= gen_rtx_REG (rmode
,
5794 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5795 tmp
= gen_rtx_EXPR_LIST
5797 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5798 XVECEXP (par
, 0, i
) = tmp
;
5801 pcum
->aapcs_reg
= par
;
5804 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5811 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5813 const_tree type ATTRIBUTE_UNUSED
)
5815 if (!use_vfp_abi (pcs_variant
, false))
5818 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
5821 machine_mode ag_mode
;
5826 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5831 if (ag_mode
== V2SImode
)
5833 else if (ag_mode
== V4SImode
)
5839 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5840 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5841 for (i
= 0; i
< count
; i
++)
5843 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5844 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5845 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5846 XVECEXP (par
, 0, i
) = tmp
;
5852 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5856 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5857 machine_mode mode ATTRIBUTE_UNUSED
,
5858 const_tree type ATTRIBUTE_UNUSED
)
5860 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5861 pcum
->aapcs_vfp_reg_alloc
= 0;
5865 #define AAPCS_CP(X) \
5867 aapcs_ ## X ## _cum_init, \
5868 aapcs_ ## X ## _is_call_candidate, \
5869 aapcs_ ## X ## _allocate, \
5870 aapcs_ ## X ## _is_return_candidate, \
5871 aapcs_ ## X ## _allocate_return_reg, \
5872 aapcs_ ## X ## _advance \
5875 /* Table of co-processors that can be used to pass arguments in
5876 registers. Idealy no arugment should be a candidate for more than
5877 one co-processor table entry, but the table is processed in order
5878 and stops after the first match. If that entry then fails to put
5879 the argument into a co-processor register, the argument will go on
5883 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5884 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5886 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5887 BLKmode) is a candidate for this co-processor's registers; this
5888 function should ignore any position-dependent state in
5889 CUMULATIVE_ARGS and only use call-type dependent information. */
5890 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5892 /* Return true if the argument does get a co-processor register; it
5893 should set aapcs_reg to an RTX of the register allocated as is
5894 required for a return from FUNCTION_ARG. */
5895 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5897 /* Return true if a result of mode MODE (or type TYPE if MODE is
5898 BLKmode) is can be returned in this co-processor's registers. */
5899 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
5901 /* Allocate and return an RTX element to hold the return type of a
5902 call, this routine must not fail and will only be called if
5903 is_return_candidate returned true with the same parameters. */
5904 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
5906 /* Finish processing this argument and prepare to start processing
5908 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5909 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
5917 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5922 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5923 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
5930 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
5932 /* We aren't passed a decl, so we can't check that a call is local.
5933 However, it isn't clear that that would be a win anyway, since it
5934 might limit some tail-calling opportunities. */
5935 enum arm_pcs pcs_variant
;
5939 const_tree fndecl
= NULL_TREE
;
5941 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5944 fntype
= TREE_TYPE (fntype
);
5947 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5950 pcs_variant
= arm_pcs_default
;
5952 if (pcs_variant
!= ARM_PCS_AAPCS
)
5956 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5957 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
5966 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
5969 /* We aren't passed a decl, so we can't check that a call is local.
5970 However, it isn't clear that that would be a win anyway, since it
5971 might limit some tail-calling opportunities. */
5972 enum arm_pcs pcs_variant
;
5973 int unsignedp ATTRIBUTE_UNUSED
;
5977 const_tree fndecl
= NULL_TREE
;
5979 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
5982 fntype
= TREE_TYPE (fntype
);
5985 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
5988 pcs_variant
= arm_pcs_default
;
5990 /* Promote integer types. */
5991 if (type
&& INTEGRAL_TYPE_P (type
))
5992 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
5994 if (pcs_variant
!= ARM_PCS_AAPCS
)
5998 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
5999 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6001 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6005 /* Promotes small structs returned in a register to full-word size
6006 for big-endian AAPCS. */
6007 if (type
&& arm_return_in_msb (type
))
6009 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6010 if (size
% UNITS_PER_WORD
!= 0)
6012 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6013 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6017 return gen_rtx_REG (mode
, R0_REGNUM
);
6021 aapcs_libcall_value (machine_mode mode
)
6023 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6024 && GET_MODE_SIZE (mode
) <= 4)
6027 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6030 /* Lay out a function argument using the AAPCS rules. The rule
6031 numbers referred to here are those in the AAPCS. */
6033 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6034 const_tree type
, bool named
)
6039 /* We only need to do this once per argument. */
6040 if (pcum
->aapcs_arg_processed
)
6043 pcum
->aapcs_arg_processed
= true;
6045 /* Special case: if named is false then we are handling an incoming
6046 anonymous argument which is on the stack. */
6050 /* Is this a potential co-processor register candidate? */
6051 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6053 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6054 pcum
->aapcs_cprc_slot
= slot
;
6056 /* We don't have to apply any of the rules from part B of the
6057 preparation phase, these are handled elsewhere in the
6062 /* A Co-processor register candidate goes either in its own
6063 class of registers or on the stack. */
6064 if (!pcum
->aapcs_cprc_failed
[slot
])
6066 /* C1.cp - Try to allocate the argument to co-processor
6068 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6071 /* C2.cp - Put the argument on the stack and note that we
6072 can't assign any more candidates in this slot. We also
6073 need to note that we have allocated stack space, so that
6074 we won't later try to split a non-cprc candidate between
6075 core registers and the stack. */
6076 pcum
->aapcs_cprc_failed
[slot
] = true;
6077 pcum
->can_split
= false;
6080 /* We didn't get a register, so this argument goes on the
6082 gcc_assert (pcum
->can_split
== false);
6087 /* C3 - For double-word aligned arguments, round the NCRN up to the
6088 next even number. */
6089 ncrn
= pcum
->aapcs_ncrn
;
6090 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6093 nregs
= ARM_NUM_REGS2(mode
, type
);
6095 /* Sigh, this test should really assert that nregs > 0, but a GCC
6096 extension allows empty structs and then gives them empty size; it
6097 then allows such a structure to be passed by value. For some of
6098 the code below we have to pretend that such an argument has
6099 non-zero size so that we 'locate' it correctly either in
6100 registers or on the stack. */
6101 gcc_assert (nregs
>= 0);
6103 nregs2
= nregs
? nregs
: 1;
6105 /* C4 - Argument fits entirely in core registers. */
6106 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6108 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6109 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6113 /* C5 - Some core registers left and there are no arguments already
6114 on the stack: split this argument between the remaining core
6115 registers and the stack. */
6116 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6118 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6119 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6120 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6124 /* C6 - NCRN is set to 4. */
6125 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6127 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6131 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6132 for a call to a function whose data type is FNTYPE.
6133 For a library call, FNTYPE is NULL. */
6135 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6137 tree fndecl ATTRIBUTE_UNUSED
)
6139 /* Long call handling. */
6141 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6143 pcum
->pcs_variant
= arm_pcs_default
;
6145 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6147 if (arm_libcall_uses_aapcs_base (libname
))
6148 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6150 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6151 pcum
->aapcs_reg
= NULL_RTX
;
6152 pcum
->aapcs_partial
= 0;
6153 pcum
->aapcs_arg_processed
= false;
6154 pcum
->aapcs_cprc_slot
= -1;
6155 pcum
->can_split
= true;
6157 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6161 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6163 pcum
->aapcs_cprc_failed
[i
] = false;
6164 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6172 /* On the ARM, the offset starts at 0. */
6174 pcum
->iwmmxt_nregs
= 0;
6175 pcum
->can_split
= true;
6177 /* Varargs vectors are treated the same as long long.
6178 named_count avoids having to change the way arm handles 'named' */
6179 pcum
->named_count
= 0;
6182 if (TARGET_REALLY_IWMMXT
&& fntype
)
6186 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6188 fn_arg
= TREE_CHAIN (fn_arg
))
6189 pcum
->named_count
+= 1;
6191 if (! pcum
->named_count
)
6192 pcum
->named_count
= INT_MAX
;
6196 /* Return true if mode/type need doubleword alignment. */
6198 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6201 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6203 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6204 if (!AGGREGATE_TYPE_P (type
))
6205 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6207 /* Array types: Use member alignment of element type. */
6208 if (TREE_CODE (type
) == ARRAY_TYPE
)
6209 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6211 /* Record/aggregate types: Use greatest member alignment of any member. */
6212 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6213 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6220 /* Determine where to put an argument to a function.
6221 Value is zero to push the argument on the stack,
6222 or a hard register in which to store the argument.
6224 MODE is the argument's machine mode.
6225 TYPE is the data type of the argument (as a tree).
6226 This is null for libcalls where that information may
6228 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6229 the preceding args and about the function being called.
6230 NAMED is nonzero if this argument is a named parameter
6231 (otherwise it is an extra parameter matching an ellipsis).
6233 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6234 other arguments are passed on the stack. If (NAMED == 0) (which happens
6235 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6236 defined), say it is passed in the stack (function_prologue will
6237 indeed make it pass in the stack if necessary). */
6240 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6241 const_tree type
, bool named
)
6243 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6246 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6247 a call insn (op3 of a call_value insn). */
6248 if (mode
== VOIDmode
)
6251 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6253 aapcs_layout_arg (pcum
, mode
, type
, named
);
6254 return pcum
->aapcs_reg
;
6257 /* Varargs vectors are treated the same as long long.
6258 named_count avoids having to change the way arm handles 'named' */
6259 if (TARGET_IWMMXT_ABI
6260 && arm_vector_mode_supported_p (mode
)
6261 && pcum
->named_count
> pcum
->nargs
+ 1)
6263 if (pcum
->iwmmxt_nregs
<= 9)
6264 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6267 pcum
->can_split
= false;
6272 /* Put doubleword aligned quantities in even register pairs. */
6274 && ARM_DOUBLEWORD_ALIGN
6275 && arm_needs_doubleword_align (mode
, type
))
6278 /* Only allow splitting an arg between regs and memory if all preceding
6279 args were allocated to regs. For args passed by reference we only count
6280 the reference pointer. */
6281 if (pcum
->can_split
)
6284 nregs
= ARM_NUM_REGS2 (mode
, type
);
6286 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6289 return gen_rtx_REG (mode
, pcum
->nregs
);
6293 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6295 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6296 ? DOUBLEWORD_ALIGNMENT
6301 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6302 tree type
, bool named
)
6304 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6305 int nregs
= pcum
->nregs
;
6307 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6309 aapcs_layout_arg (pcum
, mode
, type
, named
);
6310 return pcum
->aapcs_partial
;
6313 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6316 if (NUM_ARG_REGS
> nregs
6317 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6319 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6324 /* Update the data in PCUM to advance over an argument
6325 of mode MODE and data type TYPE.
6326 (TYPE is null for libcalls where that information may not be available.) */
6329 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6330 const_tree type
, bool named
)
6332 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6334 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6336 aapcs_layout_arg (pcum
, mode
, type
, named
);
6338 if (pcum
->aapcs_cprc_slot
>= 0)
6340 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6342 pcum
->aapcs_cprc_slot
= -1;
6345 /* Generic stuff. */
6346 pcum
->aapcs_arg_processed
= false;
6347 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6348 pcum
->aapcs_reg
= NULL_RTX
;
6349 pcum
->aapcs_partial
= 0;
6354 if (arm_vector_mode_supported_p (mode
)
6355 && pcum
->named_count
> pcum
->nargs
6356 && TARGET_IWMMXT_ABI
)
6357 pcum
->iwmmxt_nregs
+= 1;
6359 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6363 /* Variable sized types are passed by reference. This is a GCC
6364 extension to the ARM ABI. */
6367 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6368 machine_mode mode ATTRIBUTE_UNUSED
,
6369 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6371 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6374 /* Encode the current state of the #pragma [no_]long_calls. */
6377 OFF
, /* No #pragma [no_]long_calls is in effect. */
6378 LONG
, /* #pragma long_calls is in effect. */
6379 SHORT
/* #pragma no_long_calls is in effect. */
6382 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6385 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6387 arm_pragma_long_calls
= LONG
;
6391 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6393 arm_pragma_long_calls
= SHORT
;
6397 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6399 arm_pragma_long_calls
= OFF
;
6402 /* Handle an attribute requiring a FUNCTION_DECL;
6403 arguments as in struct attribute_spec.handler. */
6405 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6406 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6408 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6410 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6412 *no_add_attrs
= true;
6418 /* Handle an "interrupt" or "isr" attribute;
6419 arguments as in struct attribute_spec.handler. */
6421 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6426 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6428 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6430 *no_add_attrs
= true;
6432 /* FIXME: the argument if any is checked for type attributes;
6433 should it be checked for decl ones? */
6437 if (TREE_CODE (*node
) == FUNCTION_TYPE
6438 || TREE_CODE (*node
) == METHOD_TYPE
)
6440 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6442 warning (OPT_Wattributes
, "%qE attribute ignored",
6444 *no_add_attrs
= true;
6447 else if (TREE_CODE (*node
) == POINTER_TYPE
6448 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6449 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6450 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6452 *node
= build_variant_type_copy (*node
);
6453 TREE_TYPE (*node
) = build_type_attribute_variant
6455 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6456 *no_add_attrs
= true;
6460 /* Possibly pass this attribute on from the type to a decl. */
6461 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6462 | (int) ATTR_FLAG_FUNCTION_NEXT
6463 | (int) ATTR_FLAG_ARRAY_NEXT
))
6465 *no_add_attrs
= true;
6466 return tree_cons (name
, args
, NULL_TREE
);
6470 warning (OPT_Wattributes
, "%qE attribute ignored",
6479 /* Handle a "pcs" attribute; arguments as in struct
6480 attribute_spec.handler. */
6482 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6483 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6485 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6487 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6488 *no_add_attrs
= true;
6493 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6494 /* Handle the "notshared" attribute. This attribute is another way of
6495 requesting hidden visibility. ARM's compiler supports
6496 "__declspec(notshared)"; we support the same thing via an
6500 arm_handle_notshared_attribute (tree
*node
,
6501 tree name ATTRIBUTE_UNUSED
,
6502 tree args ATTRIBUTE_UNUSED
,
6503 int flags ATTRIBUTE_UNUSED
,
6506 tree decl
= TYPE_NAME (*node
);
6510 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6511 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6512 *no_add_attrs
= false;
6518 /* Return 0 if the attributes for two types are incompatible, 1 if they
6519 are compatible, and 2 if they are nearly compatible (which causes a
6520 warning to be generated). */
6522 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6526 /* Check for mismatch of non-default calling convention. */
6527 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6530 /* Check for mismatched call attributes. */
6531 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6532 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6533 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6534 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6536 /* Only bother to check if an attribute is defined. */
6537 if (l1
| l2
| s1
| s2
)
6539 /* If one type has an attribute, the other must have the same attribute. */
6540 if ((l1
!= l2
) || (s1
!= s2
))
6543 /* Disallow mixed attributes. */
6544 if ((l1
& s2
) || (l2
& s1
))
6548 /* Check for mismatched ISR attribute. */
6549 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6551 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6552 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6554 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6561 /* Assigns default attributes to newly defined type. This is used to
6562 set short_call/long_call attributes for function types of
6563 functions defined inside corresponding #pragma scopes. */
6565 arm_set_default_type_attributes (tree type
)
6567 /* Add __attribute__ ((long_call)) to all functions, when
6568 inside #pragma long_calls or __attribute__ ((short_call)),
6569 when inside #pragma no_long_calls. */
6570 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6572 tree type_attr_list
, attr_name
;
6573 type_attr_list
= TYPE_ATTRIBUTES (type
);
6575 if (arm_pragma_long_calls
== LONG
)
6576 attr_name
= get_identifier ("long_call");
6577 else if (arm_pragma_long_calls
== SHORT
)
6578 attr_name
= get_identifier ("short_call");
6582 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6583 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6587 /* Return true if DECL is known to be linked into section SECTION. */
6590 arm_function_in_section_p (tree decl
, section
*section
)
6592 /* We can only be certain about the prevailing symbol definition. */
6593 if (!decl_binds_to_current_def_p (decl
))
6596 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6597 if (!DECL_SECTION_NAME (decl
))
6599 /* Make sure that we will not create a unique section for DECL. */
6600 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6604 return function_section (decl
) == section
;
6607 /* Return nonzero if a 32-bit "long_call" should be generated for
6608 a call from the current function to DECL. We generate a long_call
6611 a. has an __attribute__((long call))
6612 or b. is within the scope of a #pragma long_calls
6613 or c. the -mlong-calls command line switch has been specified
6615 However we do not generate a long call if the function:
6617 d. has an __attribute__ ((short_call))
6618 or e. is inside the scope of a #pragma no_long_calls
6619 or f. is defined in the same section as the current function. */
6622 arm_is_long_call_p (tree decl
)
6627 return TARGET_LONG_CALLS
;
6629 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6630 if (lookup_attribute ("short_call", attrs
))
6633 /* For "f", be conservative, and only cater for cases in which the
6634 whole of the current function is placed in the same section. */
6635 if (!flag_reorder_blocks_and_partition
6636 && TREE_CODE (decl
) == FUNCTION_DECL
6637 && arm_function_in_section_p (decl
, current_function_section ()))
6640 if (lookup_attribute ("long_call", attrs
))
6643 return TARGET_LONG_CALLS
;
6646 /* Return nonzero if it is ok to make a tail-call to DECL. */
6648 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6650 unsigned long func_type
;
6652 if (cfun
->machine
->sibcall_blocked
)
6655 /* Never tailcall something if we are generating code for Thumb-1. */
6659 /* The PIC register is live on entry to VxWorks PLT entries, so we
6660 must make the call before restoring the PIC register. */
6661 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
6664 /* If we are interworking and the function is not declared static
6665 then we can't tail-call it unless we know that it exists in this
6666 compilation unit (since it might be a Thumb routine). */
6667 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6668 && !TREE_ASM_WRITTEN (decl
))
6671 func_type
= arm_current_func_type ();
6672 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6673 if (IS_INTERRUPT (func_type
))
6676 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6678 /* Check that the return value locations are the same. For
6679 example that we aren't returning a value from the sibling in
6680 a VFP register but then need to transfer it to a core
6684 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
6685 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6687 if (!rtx_equal_p (a
, b
))
6691 /* Never tailcall if function may be called with a misaligned SP. */
6692 if (IS_STACKALIGN (func_type
))
6695 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6696 references should become a NOP. Don't convert such calls into
6698 if (TARGET_AAPCS_BASED
6699 && arm_abi
== ARM_ABI_AAPCS
6701 && DECL_WEAK (decl
))
6704 /* Everything else is ok. */
6709 /* Addressing mode support functions. */
6711 /* Return nonzero if X is a legitimate immediate operand when compiling
6712 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6714 legitimate_pic_operand_p (rtx x
)
6716 if (GET_CODE (x
) == SYMBOL_REF
6717 || (GET_CODE (x
) == CONST
6718 && GET_CODE (XEXP (x
, 0)) == PLUS
6719 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6725 /* Record that the current function needs a PIC register. Initialize
6726 cfun->machine->pic_reg if we have not already done so. */
6729 require_pic_register (void)
6731 /* A lot of the logic here is made obscure by the fact that this
6732 routine gets called as part of the rtx cost estimation process.
6733 We don't want those calls to affect any assumptions about the real
6734 function; and further, we can't call entry_of_function() until we
6735 start the real expansion process. */
6736 if (!crtl
->uses_pic_offset_table
)
6738 gcc_assert (can_create_pseudo_p ());
6739 if (arm_pic_register
!= INVALID_REGNUM
6740 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6742 if (!cfun
->machine
->pic_reg
)
6743 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6745 /* Play games to avoid marking the function as needing pic
6746 if we are being called as part of the cost-estimation
6748 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6749 crtl
->uses_pic_offset_table
= 1;
6753 rtx_insn
*seq
, *insn
;
6755 if (!cfun
->machine
->pic_reg
)
6756 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6758 /* Play games to avoid marking the function as needing pic
6759 if we are being called as part of the cost-estimation
6761 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6763 crtl
->uses_pic_offset_table
= 1;
6766 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6767 && arm_pic_register
> LAST_LO_REGNUM
)
6768 emit_move_insn (cfun
->machine
->pic_reg
,
6769 gen_rtx_REG (Pmode
, arm_pic_register
));
6771 arm_load_pic_register (0UL);
6776 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6778 INSN_LOCATION (insn
) = prologue_location
;
6780 /* We can be called during expansion of PHI nodes, where
6781 we can't yet emit instructions directly in the final
6782 insn stream. Queue the insns on the entry edge, they will
6783 be committed after everything else is expanded. */
6784 insert_insn_on_edge (seq
,
6785 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6792 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6794 if (GET_CODE (orig
) == SYMBOL_REF
6795 || GET_CODE (orig
) == LABEL_REF
)
6801 gcc_assert (can_create_pseudo_p ());
6802 reg
= gen_reg_rtx (Pmode
);
6805 /* VxWorks does not impose a fixed gap between segments; the run-time
6806 gap can be different from the object-file gap. We therefore can't
6807 use GOTOFF unless we are absolutely sure that the symbol is in the
6808 same segment as the GOT. Unfortunately, the flexibility of linker
6809 scripts means that we can't be sure of that in general, so assume
6810 that GOTOFF is never valid on VxWorks. */
6811 if ((GET_CODE (orig
) == LABEL_REF
6812 || (GET_CODE (orig
) == SYMBOL_REF
&&
6813 SYMBOL_REF_LOCAL_P (orig
)))
6815 && arm_pic_data_is_text_relative
)
6816 insn
= arm_pic_static_addr (orig
, reg
);
6822 /* If this function doesn't have a pic register, create one now. */
6823 require_pic_register ();
6825 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6827 /* Make the MEM as close to a constant as possible. */
6828 mem
= SET_SRC (pat
);
6829 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6830 MEM_READONLY_P (mem
) = 1;
6831 MEM_NOTRAP_P (mem
) = 1;
6833 insn
= emit_insn (pat
);
6836 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6838 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6842 else if (GET_CODE (orig
) == CONST
)
6846 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6847 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6850 /* Handle the case where we have: const (UNSPEC_TLS). */
6851 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6852 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6855 /* Handle the case where we have:
6856 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6858 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6859 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6860 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6862 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6868 gcc_assert (can_create_pseudo_p ());
6869 reg
= gen_reg_rtx (Pmode
);
6872 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6874 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6875 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6876 base
== reg
? 0 : reg
);
6878 if (CONST_INT_P (offset
))
6880 /* The base register doesn't really matter, we only want to
6881 test the index for the appropriate mode. */
6882 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6884 gcc_assert (can_create_pseudo_p ());
6885 offset
= force_reg (Pmode
, offset
);
6888 if (CONST_INT_P (offset
))
6889 return plus_constant (Pmode
, base
, INTVAL (offset
));
6892 if (GET_MODE_SIZE (mode
) > 4
6893 && (GET_MODE_CLASS (mode
) == MODE_INT
6894 || TARGET_SOFT_FLOAT
))
6896 emit_insn (gen_addsi3 (reg
, base
, offset
));
6900 return gen_rtx_PLUS (Pmode
, base
, offset
);
6907 /* Find a spare register to use during the prolog of a function. */
6910 thumb_find_work_register (unsigned long pushed_regs_mask
)
6914 /* Check the argument registers first as these are call-used. The
6915 register allocation order means that sometimes r3 might be used
6916 but earlier argument registers might not, so check them all. */
6917 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
6918 if (!df_regs_ever_live_p (reg
))
6921 /* Before going on to check the call-saved registers we can try a couple
6922 more ways of deducing that r3 is available. The first is when we are
6923 pushing anonymous arguments onto the stack and we have less than 4
6924 registers worth of fixed arguments(*). In this case r3 will be part of
6925 the variable argument list and so we can be sure that it will be
6926 pushed right at the start of the function. Hence it will be available
6927 for the rest of the prologue.
6928 (*): ie crtl->args.pretend_args_size is greater than 0. */
6929 if (cfun
->machine
->uses_anonymous_args
6930 && crtl
->args
.pretend_args_size
> 0)
6931 return LAST_ARG_REGNUM
;
6933 /* The other case is when we have fixed arguments but less than 4 registers
6934 worth. In this case r3 might be used in the body of the function, but
6935 it is not being used to convey an argument into the function. In theory
6936 we could just check crtl->args.size to see how many bytes are
6937 being passed in argument registers, but it seems that it is unreliable.
6938 Sometimes it will have the value 0 when in fact arguments are being
6939 passed. (See testcase execute/20021111-1.c for an example). So we also
6940 check the args_info.nregs field as well. The problem with this field is
6941 that it makes no allowances for arguments that are passed to the
6942 function but which are not used. Hence we could miss an opportunity
6943 when a function has an unused argument in r3. But it is better to be
6944 safe than to be sorry. */
6945 if (! cfun
->machine
->uses_anonymous_args
6946 && crtl
->args
.size
>= 0
6947 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
6948 && (TARGET_AAPCS_BASED
6949 ? crtl
->args
.info
.aapcs_ncrn
< 4
6950 : crtl
->args
.info
.nregs
< 4))
6951 return LAST_ARG_REGNUM
;
6953 /* Otherwise look for a call-saved register that is going to be pushed. */
6954 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
6955 if (pushed_regs_mask
& (1 << reg
))
6960 /* Thumb-2 can use high regs. */
6961 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
6962 if (pushed_regs_mask
& (1 << reg
))
6965 /* Something went wrong - thumb_compute_save_reg_mask()
6966 should have arranged for a suitable register to be pushed. */
6970 static GTY(()) int pic_labelno
;
6972 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6976 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
6978 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
6980 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
6983 gcc_assert (flag_pic
);
6985 pic_reg
= cfun
->machine
->pic_reg
;
6986 if (TARGET_VXWORKS_RTP
)
6988 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
6989 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
6990 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
6992 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
6994 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6995 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
6999 /* We use an UNSPEC rather than a LABEL_REF because this label
7000 never appears in the code stream. */
7002 labelno
= GEN_INT (pic_labelno
++);
7003 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7004 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7006 /* On the ARM the PC register contains 'dot + 8' at the time of the
7007 addition, on the Thumb it is 'dot + 4'. */
7008 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7009 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7011 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7015 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7017 else /* TARGET_THUMB1 */
7019 if (arm_pic_register
!= INVALID_REGNUM
7020 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7022 /* We will have pushed the pic register, so we should always be
7023 able to find a work register. */
7024 pic_tmp
= gen_rtx_REG (SImode
,
7025 thumb_find_work_register (saved_regs
));
7026 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7027 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7028 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7030 else if (arm_pic_register
!= INVALID_REGNUM
7031 && arm_pic_register
> LAST_LO_REGNUM
7032 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7034 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7035 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7036 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7039 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7043 /* Need to emit this whether or not we obey regdecls,
7044 since setjmp/longjmp can cause life info to screw up. */
7048 /* Generate code to load the address of a static var when flag_pic is set. */
7050 arm_pic_static_addr (rtx orig
, rtx reg
)
7052 rtx l1
, labelno
, offset_rtx
, insn
;
7054 gcc_assert (flag_pic
);
7056 /* We use an UNSPEC rather than a LABEL_REF because this label
7057 never appears in the code stream. */
7058 labelno
= GEN_INT (pic_labelno
++);
7059 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7060 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7062 /* On the ARM the PC register contains 'dot + 8' at the time of the
7063 addition, on the Thumb it is 'dot + 4'. */
7064 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7065 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7066 UNSPEC_SYMBOL_OFFSET
);
7067 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7069 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7073 /* Return nonzero if X is valid as an ARM state addressing register. */
7075 arm_address_register_rtx_p (rtx x
, int strict_p
)
7085 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7087 return (regno
<= LAST_ARM_REGNUM
7088 || regno
>= FIRST_PSEUDO_REGISTER
7089 || regno
== FRAME_POINTER_REGNUM
7090 || regno
== ARG_POINTER_REGNUM
);
7093 /* Return TRUE if this rtx is the difference of a symbol and a label,
7094 and will reduce to a PC-relative relocation in the object file.
7095 Expressions like this can be left alone when generating PIC, rather
7096 than forced through the GOT. */
7098 pcrel_constant_p (rtx x
)
7100 if (GET_CODE (x
) == MINUS
)
7101 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7106 /* Return true if X will surely end up in an index register after next
7109 will_be_in_index_register (const_rtx x
)
7111 /* arm.md: calculate_pic_address will split this into a register. */
7112 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7115 /* Return nonzero if X is a valid ARM state address operand. */
7117 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7121 enum rtx_code code
= GET_CODE (x
);
7123 if (arm_address_register_rtx_p (x
, strict_p
))
7126 use_ldrd
= (TARGET_LDRD
7128 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7130 if (code
== POST_INC
|| code
== PRE_DEC
7131 || ((code
== PRE_INC
|| code
== POST_DEC
)
7132 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7133 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7135 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7136 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7137 && GET_CODE (XEXP (x
, 1)) == PLUS
7138 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7140 rtx addend
= XEXP (XEXP (x
, 1), 1);
7142 /* Don't allow ldrd post increment by register because it's hard
7143 to fixup invalid register choices. */
7145 && GET_CODE (x
) == POST_MODIFY
7149 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7150 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7153 /* After reload constants split into minipools will have addresses
7154 from a LABEL_REF. */
7155 else if (reload_completed
7156 && (code
== LABEL_REF
7158 && GET_CODE (XEXP (x
, 0)) == PLUS
7159 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7160 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7163 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7166 else if (code
== PLUS
)
7168 rtx xop0
= XEXP (x
, 0);
7169 rtx xop1
= XEXP (x
, 1);
7171 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7172 && ((CONST_INT_P (xop1
)
7173 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7174 || (!strict_p
&& will_be_in_index_register (xop1
))))
7175 || (arm_address_register_rtx_p (xop1
, strict_p
)
7176 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7180 /* Reload currently can't handle MINUS, so disable this for now */
7181 else if (GET_CODE (x
) == MINUS
)
7183 rtx xop0
= XEXP (x
, 0);
7184 rtx xop1
= XEXP (x
, 1);
7186 return (arm_address_register_rtx_p (xop0
, strict_p
)
7187 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7191 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7192 && code
== SYMBOL_REF
7193 && CONSTANT_POOL_ADDRESS_P (x
)
7195 && symbol_mentioned_p (get_pool_constant (x
))
7196 && ! pcrel_constant_p (get_pool_constant (x
))))
7202 /* Return nonzero if X is a valid Thumb-2 address operand. */
7204 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7207 enum rtx_code code
= GET_CODE (x
);
7209 if (arm_address_register_rtx_p (x
, strict_p
))
7212 use_ldrd
= (TARGET_LDRD
7214 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
7216 if (code
== POST_INC
|| code
== PRE_DEC
7217 || ((code
== PRE_INC
|| code
== POST_DEC
)
7218 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7219 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7221 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7222 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7223 && GET_CODE (XEXP (x
, 1)) == PLUS
7224 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7226 /* Thumb-2 only has autoincrement by constant. */
7227 rtx addend
= XEXP (XEXP (x
, 1), 1);
7228 HOST_WIDE_INT offset
;
7230 if (!CONST_INT_P (addend
))
7233 offset
= INTVAL(addend
);
7234 if (GET_MODE_SIZE (mode
) <= 4)
7235 return (offset
> -256 && offset
< 256);
7237 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7238 && (offset
& 3) == 0);
7241 /* After reload constants split into minipools will have addresses
7242 from a LABEL_REF. */
7243 else if (reload_completed
7244 && (code
== LABEL_REF
7246 && GET_CODE (XEXP (x
, 0)) == PLUS
7247 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7248 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7251 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7254 else if (code
== PLUS
)
7256 rtx xop0
= XEXP (x
, 0);
7257 rtx xop1
= XEXP (x
, 1);
7259 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7260 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7261 || (!strict_p
&& will_be_in_index_register (xop1
))))
7262 || (arm_address_register_rtx_p (xop1
, strict_p
)
7263 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7266 /* Normally we can assign constant values to target registers without
7267 the help of constant pool. But there are cases we have to use constant
7269 1) assign a label to register.
7270 2) sign-extend a 8bit value to 32bit and then assign to register.
7272 Constant pool access in format:
7273 (set (reg r0) (mem (symbol_ref (".LC0"))))
7274 will cause the use of literal pool (later in function arm_reorg).
7275 So here we mark such format as an invalid format, then the compiler
7276 will adjust it into:
7277 (set (reg r0) (symbol_ref (".LC0")))
7278 (set (reg r0) (mem (reg r0))).
7279 No extra register is required, and (mem (reg r0)) won't cause the use
7280 of literal pools. */
7281 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7282 && CONSTANT_POOL_ADDRESS_P (x
))
7285 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7286 && code
== SYMBOL_REF
7287 && CONSTANT_POOL_ADDRESS_P (x
)
7289 && symbol_mentioned_p (get_pool_constant (x
))
7290 && ! pcrel_constant_p (get_pool_constant (x
))))
7296 /* Return nonzero if INDEX is valid for an address index operand in
7299 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7302 HOST_WIDE_INT range
;
7303 enum rtx_code code
= GET_CODE (index
);
7305 /* Standard coprocessor addressing modes. */
7306 if (TARGET_HARD_FLOAT
7308 && (mode
== SFmode
|| mode
== DFmode
))
7309 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7310 && INTVAL (index
) > -1024
7311 && (INTVAL (index
) & 3) == 0);
7313 /* For quad modes, we restrict the constant offset to be slightly less
7314 than what the instruction format permits. We do this because for
7315 quad mode moves, we will actually decompose them into two separate
7316 double-mode reads or writes. INDEX must therefore be a valid
7317 (double-mode) offset and so should INDEX+8. */
7318 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7319 return (code
== CONST_INT
7320 && INTVAL (index
) < 1016
7321 && INTVAL (index
) > -1024
7322 && (INTVAL (index
) & 3) == 0);
7324 /* We have no such constraint on double mode offsets, so we permit the
7325 full range of the instruction format. */
7326 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7327 return (code
== CONST_INT
7328 && INTVAL (index
) < 1024
7329 && INTVAL (index
) > -1024
7330 && (INTVAL (index
) & 3) == 0);
7332 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7333 return (code
== CONST_INT
7334 && INTVAL (index
) < 1024
7335 && INTVAL (index
) > -1024
7336 && (INTVAL (index
) & 3) == 0);
7338 if (arm_address_register_rtx_p (index
, strict_p
)
7339 && (GET_MODE_SIZE (mode
) <= 4))
7342 if (mode
== DImode
|| mode
== DFmode
)
7344 if (code
== CONST_INT
)
7346 HOST_WIDE_INT val
= INTVAL (index
);
7349 return val
> -256 && val
< 256;
7351 return val
> -4096 && val
< 4092;
7354 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7357 if (GET_MODE_SIZE (mode
) <= 4
7361 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7365 rtx xiop0
= XEXP (index
, 0);
7366 rtx xiop1
= XEXP (index
, 1);
7368 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7369 && power_of_two_operand (xiop1
, SImode
))
7370 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7371 && power_of_two_operand (xiop0
, SImode
)));
7373 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7374 || code
== ASHIFT
|| code
== ROTATERT
)
7376 rtx op
= XEXP (index
, 1);
7378 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7381 && INTVAL (op
) <= 31);
7385 /* For ARM v4 we may be doing a sign-extend operation during the
7391 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7397 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7399 return (code
== CONST_INT
7400 && INTVAL (index
) < range
7401 && INTVAL (index
) > -range
);
7404 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7405 index operand. i.e. 1, 2, 4 or 8. */
7407 thumb2_index_mul_operand (rtx op
)
7411 if (!CONST_INT_P (op
))
7415 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7418 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7420 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7422 enum rtx_code code
= GET_CODE (index
);
7424 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7425 /* Standard coprocessor addressing modes. */
7426 if (TARGET_HARD_FLOAT
7428 && (mode
== SFmode
|| mode
== DFmode
))
7429 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7430 /* Thumb-2 allows only > -256 index range for it's core register
7431 load/stores. Since we allow SF/DF in core registers, we have
7432 to use the intersection between -256~4096 (core) and -1024~1024
7434 && INTVAL (index
) > -256
7435 && (INTVAL (index
) & 3) == 0);
7437 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7439 /* For DImode assume values will usually live in core regs
7440 and only allow LDRD addressing modes. */
7441 if (!TARGET_LDRD
|| mode
!= DImode
)
7442 return (code
== CONST_INT
7443 && INTVAL (index
) < 1024
7444 && INTVAL (index
) > -1024
7445 && (INTVAL (index
) & 3) == 0);
7448 /* For quad modes, we restrict the constant offset to be slightly less
7449 than what the instruction format permits. We do this because for
7450 quad mode moves, we will actually decompose them into two separate
7451 double-mode reads or writes. INDEX must therefore be a valid
7452 (double-mode) offset and so should INDEX+8. */
7453 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7454 return (code
== CONST_INT
7455 && INTVAL (index
) < 1016
7456 && INTVAL (index
) > -1024
7457 && (INTVAL (index
) & 3) == 0);
7459 /* We have no such constraint on double mode offsets, so we permit the
7460 full range of the instruction format. */
7461 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7462 return (code
== CONST_INT
7463 && INTVAL (index
) < 1024
7464 && INTVAL (index
) > -1024
7465 && (INTVAL (index
) & 3) == 0);
7467 if (arm_address_register_rtx_p (index
, strict_p
)
7468 && (GET_MODE_SIZE (mode
) <= 4))
7471 if (mode
== DImode
|| mode
== DFmode
)
7473 if (code
== CONST_INT
)
7475 HOST_WIDE_INT val
= INTVAL (index
);
7476 /* ??? Can we assume ldrd for thumb2? */
7477 /* Thumb-2 ldrd only has reg+const addressing modes. */
7478 /* ldrd supports offsets of +-1020.
7479 However the ldr fallback does not. */
7480 return val
> -256 && val
< 256 && (val
& 3) == 0;
7488 rtx xiop0
= XEXP (index
, 0);
7489 rtx xiop1
= XEXP (index
, 1);
7491 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7492 && thumb2_index_mul_operand (xiop1
))
7493 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7494 && thumb2_index_mul_operand (xiop0
)));
7496 else if (code
== ASHIFT
)
7498 rtx op
= XEXP (index
, 1);
7500 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7503 && INTVAL (op
) <= 3);
7506 return (code
== CONST_INT
7507 && INTVAL (index
) < 4096
7508 && INTVAL (index
) > -256);
7511 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7513 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7523 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7525 return (regno
<= LAST_LO_REGNUM
7526 || regno
> LAST_VIRTUAL_REGISTER
7527 || regno
== FRAME_POINTER_REGNUM
7528 || (GET_MODE_SIZE (mode
) >= 4
7529 && (regno
== STACK_POINTER_REGNUM
7530 || regno
>= FIRST_PSEUDO_REGISTER
7531 || x
== hard_frame_pointer_rtx
7532 || x
== arg_pointer_rtx
)));
7535 /* Return nonzero if x is a legitimate index register. This is the case
7536 for any base register that can access a QImode object. */
7538 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7540 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7543 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7545 The AP may be eliminated to either the SP or the FP, so we use the
7546 least common denominator, e.g. SImode, and offsets from 0 to 64.
7548 ??? Verify whether the above is the right approach.
7550 ??? Also, the FP may be eliminated to the SP, so perhaps that
7551 needs special handling also.
7553 ??? Look at how the mips16 port solves this problem. It probably uses
7554 better ways to solve some of these problems.
7556 Although it is not incorrect, we don't accept QImode and HImode
7557 addresses based on the frame pointer or arg pointer until the
7558 reload pass starts. This is so that eliminating such addresses
7559 into stack based ones won't produce impossible code. */
7561 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7563 /* ??? Not clear if this is right. Experiment. */
7564 if (GET_MODE_SIZE (mode
) < 4
7565 && !(reload_in_progress
|| reload_completed
)
7566 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7567 || reg_mentioned_p (arg_pointer_rtx
, x
)
7568 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7569 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7570 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7571 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7574 /* Accept any base register. SP only in SImode or larger. */
7575 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7578 /* This is PC relative data before arm_reorg runs. */
7579 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7580 && GET_CODE (x
) == SYMBOL_REF
7581 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7584 /* This is PC relative data after arm_reorg runs. */
7585 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7587 && (GET_CODE (x
) == LABEL_REF
7588 || (GET_CODE (x
) == CONST
7589 && GET_CODE (XEXP (x
, 0)) == PLUS
7590 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7591 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7594 /* Post-inc indexing only supported for SImode and larger. */
7595 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7596 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7599 else if (GET_CODE (x
) == PLUS
)
7601 /* REG+REG address can be any two index registers. */
7602 /* We disallow FRAME+REG addressing since we know that FRAME
7603 will be replaced with STACK, and SP relative addressing only
7604 permits SP+OFFSET. */
7605 if (GET_MODE_SIZE (mode
) <= 4
7606 && XEXP (x
, 0) != frame_pointer_rtx
7607 && XEXP (x
, 1) != frame_pointer_rtx
7608 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7609 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7610 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7613 /* REG+const has 5-7 bit offset for non-SP registers. */
7614 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7615 || XEXP (x
, 0) == arg_pointer_rtx
)
7616 && CONST_INT_P (XEXP (x
, 1))
7617 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7620 /* REG+const has 10-bit offset for SP, but only SImode and
7621 larger is supported. */
7622 /* ??? Should probably check for DI/DFmode overflow here
7623 just like GO_IF_LEGITIMATE_OFFSET does. */
7624 else if (REG_P (XEXP (x
, 0))
7625 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7626 && GET_MODE_SIZE (mode
) >= 4
7627 && CONST_INT_P (XEXP (x
, 1))
7628 && INTVAL (XEXP (x
, 1)) >= 0
7629 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7630 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7633 else if (REG_P (XEXP (x
, 0))
7634 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7635 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7636 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7637 && REGNO (XEXP (x
, 0))
7638 <= LAST_VIRTUAL_POINTER_REGISTER
))
7639 && GET_MODE_SIZE (mode
) >= 4
7640 && CONST_INT_P (XEXP (x
, 1))
7641 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7645 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7646 && GET_MODE_SIZE (mode
) == 4
7647 && GET_CODE (x
) == SYMBOL_REF
7648 && CONSTANT_POOL_ADDRESS_P (x
)
7650 && symbol_mentioned_p (get_pool_constant (x
))
7651 && ! pcrel_constant_p (get_pool_constant (x
))))
7657 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7658 instruction of mode MODE. */
7660 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7662 switch (GET_MODE_SIZE (mode
))
7665 return val
>= 0 && val
< 32;
7668 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7672 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7678 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7681 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7682 else if (TARGET_THUMB2
)
7683 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7684 else /* if (TARGET_THUMB1) */
7685 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7688 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7690 Given an rtx X being reloaded into a reg required to be
7691 in class CLASS, return the class of reg to actually use.
7692 In general this is just CLASS, but for the Thumb core registers and
7693 immediate constants we prefer a LO_REGS class or a subset. */
7696 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7702 if (rclass
== GENERAL_REGS
)
7709 /* Build the SYMBOL_REF for __tls_get_addr. */
7711 static GTY(()) rtx tls_get_addr_libfunc
;
7714 get_tls_get_addr (void)
7716 if (!tls_get_addr_libfunc
)
7717 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7718 return tls_get_addr_libfunc
;
7722 arm_load_tp (rtx target
)
7725 target
= gen_reg_rtx (SImode
);
7729 /* Can return in any reg. */
7730 emit_insn (gen_load_tp_hard (target
));
7734 /* Always returned in r0. Immediately copy the result into a pseudo,
7735 otherwise other uses of r0 (e.g. setting up function arguments) may
7736 clobber the value. */
7740 emit_insn (gen_load_tp_soft ());
7742 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7743 emit_move_insn (target
, tmp
);
7749 load_tls_operand (rtx x
, rtx reg
)
7753 if (reg
== NULL_RTX
)
7754 reg
= gen_reg_rtx (SImode
);
7756 tmp
= gen_rtx_CONST (SImode
, x
);
7758 emit_move_insn (reg
, tmp
);
7764 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7766 rtx insns
, label
, labelno
, sum
;
7768 gcc_assert (reloc
!= TLS_DESCSEQ
);
7771 labelno
= GEN_INT (pic_labelno
++);
7772 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7773 label
= gen_rtx_CONST (VOIDmode
, label
);
7775 sum
= gen_rtx_UNSPEC (Pmode
,
7776 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7777 GEN_INT (TARGET_ARM
? 8 : 4)),
7779 reg
= load_tls_operand (sum
, reg
);
7782 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7784 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7786 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7787 LCT_PURE
, /* LCT_CONST? */
7788 Pmode
, 1, reg
, Pmode
);
7790 insns
= get_insns ();
7797 arm_tls_descseq_addr (rtx x
, rtx reg
)
7799 rtx labelno
= GEN_INT (pic_labelno
++);
7800 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7801 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7802 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7803 gen_rtx_CONST (VOIDmode
, label
),
7804 GEN_INT (!TARGET_ARM
)),
7806 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7808 emit_insn (gen_tlscall (x
, labelno
));
7810 reg
= gen_reg_rtx (SImode
);
7812 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7814 emit_move_insn (reg
, reg0
);
7820 legitimize_tls_address (rtx x
, rtx reg
)
7822 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
7823 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7827 case TLS_MODEL_GLOBAL_DYNAMIC
:
7828 if (TARGET_GNU2_TLS
)
7830 reg
= arm_tls_descseq_addr (x
, reg
);
7832 tp
= arm_load_tp (NULL_RTX
);
7834 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7838 /* Original scheme */
7839 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7840 dest
= gen_reg_rtx (Pmode
);
7841 emit_libcall_block (insns
, dest
, ret
, x
);
7845 case TLS_MODEL_LOCAL_DYNAMIC
:
7846 if (TARGET_GNU2_TLS
)
7848 reg
= arm_tls_descseq_addr (x
, reg
);
7850 tp
= arm_load_tp (NULL_RTX
);
7852 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7856 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7858 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7859 share the LDM result with other LD model accesses. */
7860 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7862 dest
= gen_reg_rtx (Pmode
);
7863 emit_libcall_block (insns
, dest
, ret
, eqv
);
7865 /* Load the addend. */
7866 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7867 GEN_INT (TLS_LDO32
)),
7869 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7870 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7874 case TLS_MODEL_INITIAL_EXEC
:
7875 labelno
= GEN_INT (pic_labelno
++);
7876 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7877 label
= gen_rtx_CONST (VOIDmode
, label
);
7878 sum
= gen_rtx_UNSPEC (Pmode
,
7879 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7880 GEN_INT (TARGET_ARM
? 8 : 4)),
7882 reg
= load_tls_operand (sum
, reg
);
7885 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7886 else if (TARGET_THUMB2
)
7887 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7890 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7891 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
7894 tp
= arm_load_tp (NULL_RTX
);
7896 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7898 case TLS_MODEL_LOCAL_EXEC
:
7899 tp
= arm_load_tp (NULL_RTX
);
7901 reg
= gen_rtx_UNSPEC (Pmode
,
7902 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
7904 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
7906 return gen_rtx_PLUS (Pmode
, tp
, reg
);
7913 /* Try machine-dependent ways of modifying an illegitimate address
7914 to be legitimate. If we find one, return the new, valid address. */
7916 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
7918 if (arm_tls_referenced_p (x
))
7922 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
7924 addend
= XEXP (XEXP (x
, 0), 1);
7925 x
= XEXP (XEXP (x
, 0), 0);
7928 if (GET_CODE (x
) != SYMBOL_REF
)
7931 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
7933 x
= legitimize_tls_address (x
, NULL_RTX
);
7937 x
= gen_rtx_PLUS (SImode
, x
, addend
);
7946 /* TODO: legitimize_address for Thumb2. */
7949 return thumb_legitimize_address (x
, orig_x
, mode
);
7952 if (GET_CODE (x
) == PLUS
)
7954 rtx xop0
= XEXP (x
, 0);
7955 rtx xop1
= XEXP (x
, 1);
7957 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
7958 xop0
= force_reg (SImode
, xop0
);
7960 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
7961 && !symbol_mentioned_p (xop1
))
7962 xop1
= force_reg (SImode
, xop1
);
7964 if (ARM_BASE_REGISTER_RTX_P (xop0
)
7965 && CONST_INT_P (xop1
))
7967 HOST_WIDE_INT n
, low_n
;
7971 /* VFP addressing modes actually allow greater offsets, but for
7972 now we just stick with the lowest common denominator. */
7974 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
7986 low_n
= ((mode
) == TImode
? 0
7987 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
7991 base_reg
= gen_reg_rtx (SImode
);
7992 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
7993 emit_move_insn (base_reg
, val
);
7994 x
= plus_constant (Pmode
, base_reg
, low_n
);
7996 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
7997 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8000 /* XXX We don't allow MINUS any more -- see comment in
8001 arm_legitimate_address_outer_p (). */
8002 else if (GET_CODE (x
) == MINUS
)
8004 rtx xop0
= XEXP (x
, 0);
8005 rtx xop1
= XEXP (x
, 1);
8007 if (CONSTANT_P (xop0
))
8008 xop0
= force_reg (SImode
, xop0
);
8010 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8011 xop1
= force_reg (SImode
, xop1
);
8013 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8014 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8017 /* Make sure to take full advantage of the pre-indexed addressing mode
8018 with absolute addresses which often allows for the base register to
8019 be factorized for multiple adjacent memory references, and it might
8020 even allows for the mini pool to be avoided entirely. */
8021 else if (CONST_INT_P (x
) && optimize
> 0)
8024 HOST_WIDE_INT mask
, base
, index
;
8027 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8028 use a 8-bit index. So let's use a 12-bit index for SImode only and
8029 hope that arm_gen_constant will enable ldrb to use more bits. */
8030 bits
= (mode
== SImode
) ? 12 : 8;
8031 mask
= (1 << bits
) - 1;
8032 base
= INTVAL (x
) & ~mask
;
8033 index
= INTVAL (x
) & mask
;
8034 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8036 /* It'll most probably be more efficient to generate the base
8037 with more bits set and use a negative index instead. */
8041 base_reg
= force_reg (SImode
, GEN_INT (base
));
8042 x
= plus_constant (Pmode
, base_reg
, index
);
8047 /* We need to find and carefully transform any SYMBOL and LABEL
8048 references; so go back to the original address expression. */
8049 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8051 if (new_x
!= orig_x
)
8059 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8060 to be legitimate. If we find one, return the new, valid address. */
8062 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8064 if (GET_CODE (x
) == PLUS
8065 && CONST_INT_P (XEXP (x
, 1))
8066 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8067 || INTVAL (XEXP (x
, 1)) < 0))
8069 rtx xop0
= XEXP (x
, 0);
8070 rtx xop1
= XEXP (x
, 1);
8071 HOST_WIDE_INT offset
= INTVAL (xop1
);
8073 /* Try and fold the offset into a biasing of the base register and
8074 then offsetting that. Don't do this when optimizing for space
8075 since it can cause too many CSEs. */
8076 if (optimize_size
&& offset
>= 0
8077 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8079 HOST_WIDE_INT delta
;
8082 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8083 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8084 delta
= 31 * GET_MODE_SIZE (mode
);
8086 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8088 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8090 x
= plus_constant (Pmode
, xop0
, delta
);
8092 else if (offset
< 0 && offset
> -256)
8093 /* Small negative offsets are best done with a subtract before the
8094 dereference, forcing these into a register normally takes two
8096 x
= force_operand (x
, NULL_RTX
);
8099 /* For the remaining cases, force the constant into a register. */
8100 xop1
= force_reg (SImode
, xop1
);
8101 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8104 else if (GET_CODE (x
) == PLUS
8105 && s_register_operand (XEXP (x
, 1), SImode
)
8106 && !s_register_operand (XEXP (x
, 0), SImode
))
8108 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8110 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8115 /* We need to find and carefully transform any SYMBOL and LABEL
8116 references; so go back to the original address expression. */
8117 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8119 if (new_x
!= orig_x
)
8126 /* Return TRUE if X contains any TLS symbol references. */
8129 arm_tls_referenced_p (rtx x
)
8131 if (! TARGET_HAVE_TLS
)
8134 subrtx_iterator::array_type array
;
8135 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8137 const_rtx x
= *iter
;
8138 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8141 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8142 TLS offsets, not real symbol references. */
8143 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8144 iter
.skip_subrtxes ();
8149 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8151 On the ARM, allow any integer (invalid ones are removed later by insn
8152 patterns), nice doubles and symbol_refs which refer to the function's
8155 When generating pic allow anything. */
8158 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8160 return flag_pic
|| !label_mentioned_p (x
);
8164 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8166 return (CONST_INT_P (x
)
8167 || CONST_DOUBLE_P (x
)
8168 || CONSTANT_ADDRESS_P (x
)
8173 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8175 return (!arm_cannot_force_const_mem (mode
, x
)
8177 ? arm_legitimate_constant_p_1 (mode
, x
)
8178 : thumb_legitimate_constant_p (mode
, x
)));
8181 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8184 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8188 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8190 split_const (x
, &base
, &offset
);
8191 if (GET_CODE (base
) == SYMBOL_REF
8192 && !offset_within_block_p (base
, INTVAL (offset
)))
8195 return arm_tls_referenced_p (x
);
8198 #define REG_OR_SUBREG_REG(X) \
8200 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8202 #define REG_OR_SUBREG_RTX(X) \
8203 (REG_P (X) ? (X) : SUBREG_REG (X))
8206 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8208 machine_mode mode
= GET_MODE (x
);
8217 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8224 return COSTS_N_INSNS (1);
8227 if (CONST_INT_P (XEXP (x
, 1)))
8230 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8237 return COSTS_N_INSNS (2) + cycles
;
8239 return COSTS_N_INSNS (1) + 16;
8242 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8244 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8245 return (COSTS_N_INSNS (words
)
8246 + 4 * ((MEM_P (SET_SRC (x
)))
8247 + MEM_P (SET_DEST (x
))));
8252 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
8254 if (thumb_shiftable_const (INTVAL (x
)))
8255 return COSTS_N_INSNS (2);
8256 return COSTS_N_INSNS (3);
8258 else if ((outer
== PLUS
|| outer
== COMPARE
)
8259 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8261 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8262 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8263 return COSTS_N_INSNS (1);
8264 else if (outer
== AND
)
8267 /* This duplicates the tests in the andsi3 expander. */
8268 for (i
= 9; i
<= 31; i
++)
8269 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
8270 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
8271 return COSTS_N_INSNS (2);
8273 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8274 || outer
== LSHIFTRT
)
8276 return COSTS_N_INSNS (2);
8282 return COSTS_N_INSNS (3);
8300 /* XXX another guess. */
8301 /* Memory costs quite a lot for the first word, but subsequent words
8302 load at the equivalent of a single insn each. */
8303 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8304 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8309 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8315 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8316 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8322 return total
+ COSTS_N_INSNS (1);
8324 /* Assume a two-shift sequence. Increase the cost slightly so
8325 we prefer actual shifts over an extend operation. */
8326 return total
+ 1 + COSTS_N_INSNS (2);
8334 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
8336 machine_mode mode
= GET_MODE (x
);
8337 enum rtx_code subcode
;
8339 enum rtx_code code
= GET_CODE (x
);
8345 /* Memory costs quite a lot for the first word, but subsequent words
8346 load at the equivalent of a single insn each. */
8347 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8354 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
8355 *total
= COSTS_N_INSNS (2);
8356 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
8357 *total
= COSTS_N_INSNS (4);
8359 *total
= COSTS_N_INSNS (20);
8363 if (REG_P (XEXP (x
, 1)))
8364 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
8365 else if (!CONST_INT_P (XEXP (x
, 1)))
8366 *total
= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8372 *total
+= COSTS_N_INSNS (4);
8377 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
8378 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8381 *total
+= COSTS_N_INSNS (3);
8385 *total
+= COSTS_N_INSNS (1);
8386 /* Increase the cost of complex shifts because they aren't any faster,
8387 and reduce dual issue opportunities. */
8388 if (arm_tune_cortex_a9
8389 && outer
!= SET
&& !CONST_INT_P (XEXP (x
, 1)))
8397 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8398 if (CONST_INT_P (XEXP (x
, 0))
8399 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8401 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8405 if (CONST_INT_P (XEXP (x
, 1))
8406 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8408 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8415 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8417 if (TARGET_HARD_FLOAT
8419 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8421 *total
= COSTS_N_INSNS (1);
8422 if (CONST_DOUBLE_P (XEXP (x
, 0))
8423 && arm_const_double_rtx (XEXP (x
, 0)))
8425 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8429 if (CONST_DOUBLE_P (XEXP (x
, 1))
8430 && arm_const_double_rtx (XEXP (x
, 1)))
8432 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8438 *total
= COSTS_N_INSNS (20);
8442 *total
= COSTS_N_INSNS (1);
8443 if (CONST_INT_P (XEXP (x
, 0))
8444 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
8446 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8450 subcode
= GET_CODE (XEXP (x
, 1));
8451 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8452 || subcode
== LSHIFTRT
8453 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8455 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8456 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, subcode
, 0, speed
);
8460 /* A shift as a part of RSB costs no more than RSB itself. */
8461 if (GET_CODE (XEXP (x
, 0)) == MULT
8462 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8464 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
, 0, speed
);
8465 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8470 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
8472 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8473 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, subcode
, 0, speed
);
8477 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
8478 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
8480 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
8482 if (REG_P (XEXP (XEXP (x
, 1), 0))
8483 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
8484 *total
+= COSTS_N_INSNS (1);
8492 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
8493 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8494 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8496 *total
= COSTS_N_INSNS (1);
8497 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
8498 GET_CODE (XEXP (x
, 0)), 0, speed
);
8499 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8503 /* MLA: All arguments must be registers. We filter out
8504 multiplication by a power of two, so that we fall down into
8506 if (GET_CODE (XEXP (x
, 0)) == MULT
8507 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8509 /* The cost comes from the cost of the multiply. */
8513 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8515 if (TARGET_HARD_FLOAT
8517 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8519 *total
= COSTS_N_INSNS (1);
8520 if (CONST_DOUBLE_P (XEXP (x
, 1))
8521 && arm_const_double_rtx (XEXP (x
, 1)))
8523 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8530 *total
= COSTS_N_INSNS (20);
8534 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
8535 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
8537 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), mode
, code
,
8539 if (REG_P (XEXP (XEXP (x
, 0), 0))
8540 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
8541 *total
+= COSTS_N_INSNS (1);
8547 case AND
: case XOR
: case IOR
:
8549 /* Normally the frame registers will be spilt into reg+const during
8550 reload, so it is a bad idea to combine them with other instructions,
8551 since then they might not be moved outside of loops. As a compromise
8552 we allow integration with ops that have a constant as their second
8554 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
8555 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
8556 && !CONST_INT_P (XEXP (x
, 1)))
8557 *total
= COSTS_N_INSNS (1);
8561 *total
+= COSTS_N_INSNS (2);
8562 if (CONST_INT_P (XEXP (x
, 1))
8563 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8565 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8572 *total
+= COSTS_N_INSNS (1);
8573 if (CONST_INT_P (XEXP (x
, 1))
8574 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8576 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8579 subcode
= GET_CODE (XEXP (x
, 0));
8580 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8581 || subcode
== LSHIFTRT
8582 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8584 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8585 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8590 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8592 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8593 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8597 if (subcode
== UMIN
|| subcode
== UMAX
8598 || subcode
== SMIN
|| subcode
== SMAX
)
8600 *total
= COSTS_N_INSNS (3);
8607 /* This should have been handled by the CPU specific routines. */
8611 if (arm_arch3m
&& mode
== SImode
8612 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8613 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8614 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
8615 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
8616 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8617 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
8619 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, LSHIFTRT
,
8623 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8627 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8629 if (TARGET_HARD_FLOAT
8631 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8633 *total
= COSTS_N_INSNS (1);
8636 *total
= COSTS_N_INSNS (2);
8642 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
8643 if (mode
== SImode
&& code
== NOT
)
8645 subcode
= GET_CODE (XEXP (x
, 0));
8646 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8647 || subcode
== LSHIFTRT
8648 || subcode
== ROTATE
|| subcode
== ROTATERT
8650 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
8652 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
,
8654 /* Register shifts cost an extra cycle. */
8655 if (!CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
8656 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
8666 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8668 *total
= COSTS_N_INSNS (4);
8672 operand
= XEXP (x
, 0);
8674 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
8675 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
8676 && REG_P (XEXP (operand
, 0))
8677 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
8678 *total
+= COSTS_N_INSNS (1);
8679 *total
+= rtx_cost (XEXP (x
, 1), VOIDmode
, code
, 1, speed
);
8680 *total
+= rtx_cost (XEXP (x
, 2), VOIDmode
, code
, 2, speed
);
8684 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8686 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
8693 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8694 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8696 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
8703 if ((!REG_P (XEXP (x
, 0)) || REGNO (XEXP (x
, 0)) != CC_REGNUM
)
8704 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
8706 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
8727 /* SCC insns. In the case where the comparison has already been
8728 performed, then they cost 2 instructions. Otherwise they need
8729 an additional comparison before them. */
8730 *total
= COSTS_N_INSNS (2);
8731 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8738 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
)
8744 *total
+= COSTS_N_INSNS (1);
8745 if (CONST_INT_P (XEXP (x
, 1))
8746 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
8748 *total
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed
);
8752 subcode
= GET_CODE (XEXP (x
, 0));
8753 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
8754 || subcode
== LSHIFTRT
8755 || subcode
== ROTATE
|| subcode
== ROTATERT
)
8757 mode
= GET_MODE (XEXP (x
, 0));
8758 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8759 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8764 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
8766 mode
= GET_MODE (XEXP (x
, 0));
8767 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8768 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, subcode
, 0, speed
);
8778 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8779 if (!CONST_INT_P (XEXP (x
, 1))
8780 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
8781 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed
);
8785 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8787 if (TARGET_HARD_FLOAT
8789 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8791 *total
= COSTS_N_INSNS (1);
8794 *total
= COSTS_N_INSNS (20);
8797 *total
= COSTS_N_INSNS (1);
8799 *total
+= COSTS_N_INSNS (3);
8805 if (GET_MODE_CLASS (mode
) == MODE_INT
)
8807 rtx op
= XEXP (x
, 0);
8808 machine_mode opmode
= GET_MODE (op
);
8811 *total
+= COSTS_N_INSNS (1);
8813 if (opmode
!= SImode
)
8817 /* If !arm_arch4, we use one of the extendhisi2_mem
8818 or movhi_bytes patterns for HImode. For a QImode
8819 sign extension, we first zero-extend from memory
8820 and then perform a shift sequence. */
8821 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
8822 *total
+= COSTS_N_INSNS (2);
8825 *total
+= COSTS_N_INSNS (1);
8827 /* We don't have the necessary insn, so we need to perform some
8829 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
8830 /* An and with constant 255. */
8831 *total
+= COSTS_N_INSNS (1);
8833 /* A shift sequence. Increase costs slightly to avoid
8834 combining two shifts into an extend operation. */
8835 *total
+= COSTS_N_INSNS (2) + 1;
8841 switch (GET_MODE (XEXP (x
, 0)))
8848 *total
= COSTS_N_INSNS (1);
8858 mode
= GET_MODE (XEXP (x
, 0));
8859 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8863 if (const_ok_for_arm (INTVAL (x
))
8864 || const_ok_for_arm (~INTVAL (x
)))
8865 *total
= COSTS_N_INSNS (1);
8867 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
8868 INTVAL (x
), NULL_RTX
,
8875 *total
= COSTS_N_INSNS (3);
8879 *total
= COSTS_N_INSNS (1);
8883 *total
= COSTS_N_INSNS (1);
8884 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
8888 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
8889 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8890 *total
= COSTS_N_INSNS (1);
8892 *total
= COSTS_N_INSNS (4);
8896 /* The vec_extract patterns accept memory operands that require an
8897 address reload. Account for the cost of that reload to give the
8898 auto-inc-dec pass an incentive to try to replace them. */
8899 if (TARGET_NEON
&& MEM_P (SET_DEST (x
))
8900 && GET_CODE (SET_SRC (x
)) == VEC_SELECT
)
8902 mode
= GET_MODE (SET_DEST (x
));
8903 *total
= rtx_cost (SET_DEST (x
), mode
, code
, 0, speed
);
8904 if (!neon_vector_mem_operand (SET_DEST (x
), 2, true))
8905 *total
+= COSTS_N_INSNS (1);
8908 /* Likewise for the vec_set patterns. */
8909 if (TARGET_NEON
&& GET_CODE (SET_SRC (x
)) == VEC_MERGE
8910 && GET_CODE (XEXP (SET_SRC (x
), 0)) == VEC_DUPLICATE
8911 && MEM_P (XEXP (XEXP (SET_SRC (x
), 0), 0)))
8913 rtx mem
= XEXP (XEXP (SET_SRC (x
), 0), 0);
8914 mode
= GET_MODE (SET_DEST (x
));
8915 *total
= rtx_cost (mem
, mode
, code
, 0, speed
);
8916 if (!neon_vector_mem_operand (mem
, 2, true))
8917 *total
+= COSTS_N_INSNS (1);
8923 /* We cost this as high as our memory costs to allow this to
8924 be hoisted from loops. */
8925 if (XINT (x
, 1) == UNSPEC_PIC_UNIFIED
)
8927 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
8933 && TARGET_HARD_FLOAT
8935 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
8936 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
8937 *total
= COSTS_N_INSNS (1);
8939 *total
= COSTS_N_INSNS (4);
8943 *total
= COSTS_N_INSNS (4);
8948 /* Estimates the size cost of thumb1 instructions.
8949 For now most of the code is copied from thumb1_rtx_costs. We need more
8950 fine grain tuning when we have more related test cases. */
8952 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8954 machine_mode mode
= GET_MODE (x
);
8963 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8967 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8968 defined by RTL expansion, especially for the expansion of
8970 if ((GET_CODE (XEXP (x
, 0)) == MULT
8971 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8972 || (GET_CODE (XEXP (x
, 1)) == MULT
8973 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8974 return COSTS_N_INSNS (2);
8975 /* On purpose fall through for normal RTX. */
8979 return COSTS_N_INSNS (1);
8982 if (CONST_INT_P (XEXP (x
, 1)))
8984 /* Thumb1 mul instruction can't operate on const. We must Load it
8985 into a register first. */
8986 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8987 /* For the targets which have a very small and high-latency multiply
8988 unit, we prefer to synthesize the mult with up to 5 instructions,
8989 giving a good balance between size and performance. */
8990 if (arm_arch6m
&& arm_m_profile_small_mul
)
8991 return COSTS_N_INSNS (5);
8993 return COSTS_N_INSNS (1) + const_size
;
8995 return COSTS_N_INSNS (1);
8998 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9000 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9001 return COSTS_N_INSNS (words
)
9002 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x
))
9003 || satisfies_constraint_K (SET_SRC (x
))
9004 /* thumb1_movdi_insn. */
9005 || ((words
> 1) && MEM_P (SET_SRC (x
))));
9010 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
9011 return COSTS_N_INSNS (1);
9012 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9013 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9014 return COSTS_N_INSNS (2);
9015 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9016 if (thumb_shiftable_const (INTVAL (x
)))
9017 return COSTS_N_INSNS (2);
9018 return COSTS_N_INSNS (3);
9020 else if ((outer
== PLUS
|| outer
== COMPARE
)
9021 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9023 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9024 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9025 return COSTS_N_INSNS (1);
9026 else if (outer
== AND
)
9029 /* This duplicates the tests in the andsi3 expander. */
9030 for (i
= 9; i
<= 31; i
++)
9031 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
9032 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
9033 return COSTS_N_INSNS (2);
9035 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9036 || outer
== LSHIFTRT
)
9038 return COSTS_N_INSNS (2);
9044 return COSTS_N_INSNS (3);
9058 return COSTS_N_INSNS (1);
9061 return (COSTS_N_INSNS (1)
9063 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9064 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9065 ? COSTS_N_INSNS (1) : 0));
9069 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9074 /* XXX still guessing. */
9075 switch (GET_MODE (XEXP (x
, 0)))
9078 return (1 + (mode
== DImode
? 4 : 0)
9079 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9082 return (4 + (mode
== DImode
? 4 : 0)
9083 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9086 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9097 /* RTX costs when optimizing for size. */
9099 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9102 machine_mode mode
= GET_MODE (x
);
9105 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9109 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9113 /* A memory access costs 1 insn if the mode is small, or the address is
9114 a single register, otherwise it costs one insn per word. */
9115 if (REG_P (XEXP (x
, 0)))
9116 *total
= COSTS_N_INSNS (1);
9118 && GET_CODE (XEXP (x
, 0)) == PLUS
9119 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9120 /* This will be split into two instructions.
9121 See arm.md:calculate_pic_address. */
9122 *total
= COSTS_N_INSNS (2);
9124 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9131 /* Needs a libcall, so it costs about this. */
9132 *total
= COSTS_N_INSNS (2);
9136 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9138 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), mode
, code
,
9147 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9149 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), mode
, code
,
9153 else if (mode
== SImode
)
9155 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), mode
, code
,
9157 /* Slightly disparage register shifts, but not by much. */
9158 if (!CONST_INT_P (XEXP (x
, 1)))
9159 *total
+= 1 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, false);
9163 /* Needs a libcall. */
9164 *total
= COSTS_N_INSNS (2);
9168 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9169 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9171 *total
= COSTS_N_INSNS (1);
9177 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
9178 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
9180 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
9181 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
9182 || subcode1
== ROTATE
|| subcode1
== ROTATERT
9183 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
9184 || subcode1
== ASHIFTRT
)
9186 /* It's just the cost of the two operands. */
9191 *total
= COSTS_N_INSNS (1);
9195 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9199 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9200 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9202 *total
= COSTS_N_INSNS (1);
9206 /* A shift as a part of ADD costs nothing. */
9207 if (GET_CODE (XEXP (x
, 0)) == MULT
9208 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
9210 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
9211 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
, 0, false);
9212 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, false);
9217 case AND
: case XOR
: case IOR
:
9220 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9222 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
9223 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
9224 || (code
== AND
&& subcode
== NOT
))
9226 /* It's just the cost of the two operands. */
9232 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9236 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9240 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9241 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9243 *total
= COSTS_N_INSNS (1);
9249 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9258 if (cc_register (XEXP (x
, 0), VOIDmode
))
9261 *total
= COSTS_N_INSNS (1);
9265 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9266 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9267 *total
= COSTS_N_INSNS (1);
9269 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
9274 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
9277 if (const_ok_for_arm (INTVAL (x
)))
9278 /* A multiplication by a constant requires another instruction
9279 to load the constant to a register. */
9280 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
9282 else if (const_ok_for_arm (~INTVAL (x
)))
9283 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
9284 else if (const_ok_for_arm (-INTVAL (x
)))
9286 if (outer_code
== COMPARE
|| outer_code
== PLUS
9287 || outer_code
== MINUS
)
9290 *total
= COSTS_N_INSNS (1);
9293 *total
= COSTS_N_INSNS (2);
9299 *total
= COSTS_N_INSNS (2);
9303 *total
= COSTS_N_INSNS (4);
9308 && TARGET_HARD_FLOAT
9309 && outer_code
== SET
9310 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
9311 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
9312 *total
= COSTS_N_INSNS (1);
9314 *total
= COSTS_N_INSNS (4);
9319 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9320 cost of these slightly. */
9321 *total
= COSTS_N_INSNS (1) + 1;
9328 if (mode
!= VOIDmode
)
9329 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9331 *total
= COSTS_N_INSNS (4); /* How knows? */
9336 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9337 operand, then return the operand that is being shifted. If the shift
9338 is not by a constant, then set SHIFT_REG to point to the operand.
9339 Return NULL if OP is not a shifter operand. */
9341 shifter_op_p (rtx op
, rtx
*shift_reg
)
9343 enum rtx_code code
= GET_CODE (op
);
9345 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9346 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9347 return XEXP (op
, 0);
9348 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9349 return XEXP (op
, 0);
9350 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9351 || code
== ASHIFTRT
)
9353 if (!CONST_INT_P (XEXP (op
, 1)))
9354 *shift_reg
= XEXP (op
, 1);
9355 return XEXP (op
, 0);
9362 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9364 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9365 rtx_code code
= GET_CODE (x
);
9366 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9368 switch (XINT (x
, 1))
9370 case UNSPEC_UNALIGNED_LOAD
:
9371 /* We can only do unaligned loads into the integer unit, and we can't
9373 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9375 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9376 + extra_cost
->ldst
.load_unaligned
);
9379 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9380 ADDR_SPACE_GENERIC
, speed_p
);
9384 case UNSPEC_UNALIGNED_STORE
:
9385 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9387 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9388 + extra_cost
->ldst
.store_unaligned
);
9390 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9392 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9393 ADDR_SPACE_GENERIC
, speed_p
);
9404 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9408 *cost
= COSTS_N_INSNS (2);
9414 /* Cost of a libcall. We assume one insn per argument, an amount for the
9415 call (one insn for -Os) and then one for processing the result. */
9416 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9418 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9421 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9422 if (shift_op != NULL \
9423 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9428 *cost += extra_cost->alu.arith_shift_reg; \
9429 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9430 ASHIFT, 1, speed_p); \
9433 *cost += extra_cost->alu.arith_shift; \
9435 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9436 ASHIFT, 0, speed_p) \
9437 + rtx_cost (XEXP (x, 1 - IDX), \
9438 GET_MODE (shift_op), \
9445 /* RTX costs. Make an estimate of the cost of executing the operation
9446 X, which is contained with an operation with code OUTER_CODE.
9447 SPEED_P indicates whether the cost desired is the performance cost,
9448 or the size cost. The estimate is stored in COST and the return
9449 value is TRUE if the cost calculation is final, or FALSE if the
9450 caller should recurse through the operands of X to add additional
9453 We currently make no attempt to model the size savings of Thumb-2
9454 16-bit instructions. At the normal points in compilation where
9455 this code is called we have no measure of whether the condition
9456 flags are live or not, and thus no realistic way to determine what
9457 the size will eventually be. */
9459 arm_new_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9460 const struct cpu_cost_table
*extra_cost
,
9461 int *cost
, bool speed_p
)
9463 machine_mode mode
= GET_MODE (x
);
9465 *cost
= COSTS_N_INSNS (1);
9470 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9472 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9480 /* SET RTXs don't have a mode so we get it from the destination. */
9481 mode
= GET_MODE (SET_DEST (x
));
9483 if (REG_P (SET_SRC (x
))
9484 && REG_P (SET_DEST (x
)))
9486 /* Assume that most copies can be done with a single insn,
9487 unless we don't have HW FP, in which case everything
9488 larger than word mode will require two insns. */
9489 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9490 && GET_MODE_SIZE (mode
) > 4)
9493 /* Conditional register moves can be encoded
9494 in 16 bits in Thumb mode. */
9495 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9501 if (CONST_INT_P (SET_SRC (x
)))
9503 /* Handle CONST_INT here, since the value doesn't have a mode
9504 and we would otherwise be unable to work out the true cost. */
9505 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9508 /* Slightly lower the cost of setting a core reg to a constant.
9509 This helps break up chains and allows for better scheduling. */
9510 if (REG_P (SET_DEST (x
))
9511 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9514 /* Immediate moves with an immediate in the range [0, 255] can be
9515 encoded in 16 bits in Thumb mode. */
9516 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9517 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9519 goto const_int_cost
;
9525 /* A memory access costs 1 insn if the mode is small, or the address is
9526 a single register, otherwise it costs one insn per word. */
9527 if (REG_P (XEXP (x
, 0)))
9528 *cost
= COSTS_N_INSNS (1);
9530 && GET_CODE (XEXP (x
, 0)) == PLUS
9531 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9532 /* This will be split into two instructions.
9533 See arm.md:calculate_pic_address. */
9534 *cost
= COSTS_N_INSNS (2);
9536 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9538 /* For speed optimizations, add the costs of the address and
9539 accessing memory. */
9542 *cost
+= (extra_cost
->ldst
.load
9543 + arm_address_cost (XEXP (x
, 0), mode
,
9544 ADDR_SPACE_GENERIC
, speed_p
));
9546 *cost
+= extra_cost
->ldst
.load
;
9552 /* Calculations of LDM costs are complex. We assume an initial cost
9553 (ldm_1st) which will load the number of registers mentioned in
9554 ldm_regs_per_insn_1st registers; then each additional
9555 ldm_regs_per_insn_subsequent registers cost one more insn. The
9556 formula for N regs is thus:
9558 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9559 + ldm_regs_per_insn_subsequent - 1)
9560 / ldm_regs_per_insn_subsequent).
9562 Additional costs may also be added for addressing. A similar
9563 formula is used for STM. */
9565 bool is_ldm
= load_multiple_operation (x
, SImode
);
9566 bool is_stm
= store_multiple_operation (x
, SImode
);
9568 if (is_ldm
|| is_stm
)
9572 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9573 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9574 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9575 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9576 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9577 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9578 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9580 *cost
+= regs_per_insn_1st
9581 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9582 + regs_per_insn_sub
- 1)
9583 / regs_per_insn_sub
);
9592 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9593 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9594 *cost
+= COSTS_N_INSNS (speed_p
9595 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9596 else if (mode
== SImode
&& TARGET_IDIV
)
9597 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9599 *cost
= LIBCALL_COST (2);
9600 return false; /* All arguments must be in registers. */
9603 /* MOD by a power of 2 can be expanded as:
9605 and r0, r0, #(n - 1)
9606 and r1, r1, #(n - 1)
9607 rsbpl r0, r1, #0. */
9608 if (CONST_INT_P (XEXP (x
, 1))
9609 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9612 *cost
+= COSTS_N_INSNS (3);
9615 *cost
+= 2 * extra_cost
->alu
.logical
9616 + extra_cost
->alu
.arith
;
9622 *cost
= LIBCALL_COST (2);
9623 return false; /* All arguments must be in registers. */
9626 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9628 *cost
+= (COSTS_N_INSNS (1)
9629 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9631 *cost
+= extra_cost
->alu
.shift_reg
;
9639 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9641 *cost
+= (COSTS_N_INSNS (2)
9642 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9644 *cost
+= 2 * extra_cost
->alu
.shift
;
9647 else if (mode
== SImode
)
9649 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9650 /* Slightly disparage register shifts at -Os, but not by much. */
9651 if (!CONST_INT_P (XEXP (x
, 1)))
9652 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9653 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9656 else if (GET_MODE_CLASS (mode
) == MODE_INT
9657 && GET_MODE_SIZE (mode
) < 4)
9661 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9662 /* Slightly disparage register shifts at -Os, but not by
9664 if (!CONST_INT_P (XEXP (x
, 1)))
9665 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9666 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9668 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9670 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9672 /* Can use SBFX/UBFX. */
9674 *cost
+= extra_cost
->alu
.bfx
;
9675 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9679 *cost
+= COSTS_N_INSNS (1);
9680 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9683 if (CONST_INT_P (XEXP (x
, 1)))
9684 *cost
+= 2 * extra_cost
->alu
.shift
;
9686 *cost
+= (extra_cost
->alu
.shift
9687 + extra_cost
->alu
.shift_reg
);
9690 /* Slightly disparage register shifts. */
9691 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9696 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9697 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9700 if (CONST_INT_P (XEXP (x
, 1)))
9701 *cost
+= (2 * extra_cost
->alu
.shift
9702 + extra_cost
->alu
.log_shift
);
9704 *cost
+= (extra_cost
->alu
.shift
9705 + extra_cost
->alu
.shift_reg
9706 + extra_cost
->alu
.log_shift_reg
);
9712 *cost
= LIBCALL_COST (2);
9721 *cost
+= extra_cost
->alu
.rev
;
9728 /* No rev instruction available. Look at arm_legacy_rev
9729 and thumb_legacy_rev for the form of RTL used then. */
9732 *cost
+= COSTS_N_INSNS (9);
9736 *cost
+= 6 * extra_cost
->alu
.shift
;
9737 *cost
+= 3 * extra_cost
->alu
.logical
;
9742 *cost
+= COSTS_N_INSNS (4);
9746 *cost
+= 2 * extra_cost
->alu
.shift
;
9747 *cost
+= extra_cost
->alu
.arith_shift
;
9748 *cost
+= 2 * extra_cost
->alu
.logical
;
9756 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9757 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9759 if (GET_CODE (XEXP (x
, 0)) == MULT
9760 || GET_CODE (XEXP (x
, 1)) == MULT
)
9762 rtx mul_op0
, mul_op1
, sub_op
;
9765 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9767 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9769 mul_op0
= XEXP (XEXP (x
, 0), 0);
9770 mul_op1
= XEXP (XEXP (x
, 0), 1);
9771 sub_op
= XEXP (x
, 1);
9775 mul_op0
= XEXP (XEXP (x
, 1), 0);
9776 mul_op1
= XEXP (XEXP (x
, 1), 1);
9777 sub_op
= XEXP (x
, 0);
9780 /* The first operand of the multiply may be optionally
9782 if (GET_CODE (mul_op0
) == NEG
)
9783 mul_op0
= XEXP (mul_op0
, 0);
9785 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9786 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9787 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9793 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9799 rtx shift_by_reg
= NULL
;
9803 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9804 if (shift_op
== NULL
)
9806 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9807 non_shift_op
= XEXP (x
, 0);
9810 non_shift_op
= XEXP (x
, 1);
9812 if (shift_op
!= NULL
)
9814 if (shift_by_reg
!= NULL
)
9817 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9818 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9821 *cost
+= extra_cost
->alu
.arith_shift
;
9823 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9824 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9829 && GET_CODE (XEXP (x
, 1)) == MULT
)
9833 *cost
+= extra_cost
->mult
[0].add
;
9834 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9835 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9836 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9840 if (CONST_INT_P (XEXP (x
, 0)))
9842 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9843 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9845 *cost
= COSTS_N_INSNS (insns
);
9847 *cost
+= insns
* extra_cost
->alu
.arith
;
9848 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9852 *cost
+= extra_cost
->alu
.arith
;
9857 if (GET_MODE_CLASS (mode
) == MODE_INT
9858 && GET_MODE_SIZE (mode
) < 4)
9860 rtx shift_op
, shift_reg
;
9863 /* We check both sides of the MINUS for shifter operands since,
9864 unlike PLUS, it's not commutative. */
9866 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9867 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9869 /* Slightly disparage, as we might need to widen the result. */
9872 *cost
+= extra_cost
->alu
.arith
;
9874 if (CONST_INT_P (XEXP (x
, 0)))
9876 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9885 *cost
+= COSTS_N_INSNS (1);
9887 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9889 rtx op1
= XEXP (x
, 1);
9892 *cost
+= 2 * extra_cost
->alu
.arith
;
9894 if (GET_CODE (op1
) == ZERO_EXTEND
)
9895 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9898 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9899 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9903 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9906 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9907 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9909 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9912 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9913 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9916 *cost
+= (extra_cost
->alu
.arith
9917 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9918 ? extra_cost
->alu
.arith
9919 : extra_cost
->alu
.arith_shift
));
9920 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9921 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9922 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9927 *cost
+= 2 * extra_cost
->alu
.arith
;
9933 *cost
= LIBCALL_COST (2);
9937 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9938 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9940 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9942 rtx mul_op0
, mul_op1
, add_op
;
9945 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9947 mul_op0
= XEXP (XEXP (x
, 0), 0);
9948 mul_op1
= XEXP (XEXP (x
, 0), 1);
9949 add_op
= XEXP (x
, 1);
9951 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9952 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9953 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9959 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9962 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9964 *cost
= LIBCALL_COST (2);
9968 /* Narrow modes can be synthesized in SImode, but the range
9969 of useful sub-operations is limited. Check for shift operations
9970 on one of the operands. Only left shifts can be used in the
9972 if (GET_MODE_CLASS (mode
) == MODE_INT
9973 && GET_MODE_SIZE (mode
) < 4)
9975 rtx shift_op
, shift_reg
;
9978 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9980 if (CONST_INT_P (XEXP (x
, 1)))
9982 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9983 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9985 *cost
= COSTS_N_INSNS (insns
);
9987 *cost
+= insns
* extra_cost
->alu
.arith
;
9988 /* Slightly penalize a narrow operation as the result may
9990 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9994 /* Slightly penalize a narrow operation as the result may
9998 *cost
+= extra_cost
->alu
.arith
;
10003 if (mode
== SImode
)
10005 rtx shift_op
, shift_reg
;
10007 if (TARGET_INT_SIMD
10008 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10009 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10011 /* UXTA[BH] or SXTA[BH]. */
10013 *cost
+= extra_cost
->alu
.extend_arith
;
10014 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10016 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10021 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10022 if (shift_op
!= NULL
)
10027 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10028 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10031 *cost
+= extra_cost
->alu
.arith_shift
;
10033 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10034 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10037 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10039 rtx mul_op
= XEXP (x
, 0);
10041 if (TARGET_DSP_MULTIPLY
10042 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10043 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10044 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10045 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10046 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10047 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10048 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10049 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10050 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10051 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10052 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10053 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10056 /* SMLA[BT][BT]. */
10058 *cost
+= extra_cost
->mult
[0].extend_add
;
10059 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10060 SIGN_EXTEND
, 0, speed_p
)
10061 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10062 SIGN_EXTEND
, 0, speed_p
)
10063 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10068 *cost
+= extra_cost
->mult
[0].add
;
10069 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10070 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10071 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10074 if (CONST_INT_P (XEXP (x
, 1)))
10076 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10077 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10079 *cost
= COSTS_N_INSNS (insns
);
10081 *cost
+= insns
* extra_cost
->alu
.arith
;
10082 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10086 *cost
+= extra_cost
->alu
.arith
;
10091 if (mode
== DImode
)
10094 && GET_CODE (XEXP (x
, 0)) == MULT
10095 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10096 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10097 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10098 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10101 *cost
+= extra_cost
->mult
[1].extend_add
;
10102 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10103 ZERO_EXTEND
, 0, speed_p
)
10104 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10105 ZERO_EXTEND
, 0, speed_p
)
10106 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10110 *cost
+= COSTS_N_INSNS (1);
10112 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10113 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10116 *cost
+= (extra_cost
->alu
.arith
10117 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10118 ? extra_cost
->alu
.arith
10119 : extra_cost
->alu
.arith_shift
));
10121 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10123 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10128 *cost
+= 2 * extra_cost
->alu
.arith
;
10133 *cost
= LIBCALL_COST (2);
10136 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
10139 *cost
+= extra_cost
->alu
.rev
;
10143 /* Fall through. */
10144 case AND
: case XOR
:
10145 if (mode
== SImode
)
10147 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
10148 rtx op0
= XEXP (x
, 0);
10149 rtx shift_op
, shift_reg
;
10153 || (code
== IOR
&& TARGET_THUMB2
)))
10154 op0
= XEXP (op0
, 0);
10157 shift_op
= shifter_op_p (op0
, &shift_reg
);
10158 if (shift_op
!= NULL
)
10163 *cost
+= extra_cost
->alu
.log_shift_reg
;
10164 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10167 *cost
+= extra_cost
->alu
.log_shift
;
10169 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10170 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10174 if (CONST_INT_P (XEXP (x
, 1)))
10176 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
10177 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10180 *cost
= COSTS_N_INSNS (insns
);
10182 *cost
+= insns
* extra_cost
->alu
.logical
;
10183 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
10188 *cost
+= extra_cost
->alu
.logical
;
10189 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
10190 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10194 if (mode
== DImode
)
10196 rtx op0
= XEXP (x
, 0);
10197 enum rtx_code subcode
= GET_CODE (op0
);
10199 *cost
+= COSTS_N_INSNS (1);
10203 || (code
== IOR
&& TARGET_THUMB2
)))
10204 op0
= XEXP (op0
, 0);
10206 if (GET_CODE (op0
) == ZERO_EXTEND
)
10209 *cost
+= 2 * extra_cost
->alu
.logical
;
10211 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10213 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10216 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10219 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10221 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10223 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10228 *cost
+= 2 * extra_cost
->alu
.logical
;
10234 *cost
= LIBCALL_COST (2);
10238 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10239 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10241 rtx op0
= XEXP (x
, 0);
10243 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10244 op0
= XEXP (op0
, 0);
10247 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10249 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10250 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10253 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10255 *cost
= LIBCALL_COST (2);
10259 if (mode
== SImode
)
10261 if (TARGET_DSP_MULTIPLY
10262 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10263 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10264 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10265 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10266 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10267 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10268 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10269 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10270 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10271 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10272 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10273 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10276 /* SMUL[TB][TB]. */
10278 *cost
+= extra_cost
->mult
[0].extend
;
10279 *cost
+= rtx_cost (XEXP (x
, 0), mode
, SIGN_EXTEND
, 0, speed_p
);
10280 *cost
+= rtx_cost (XEXP (x
, 1), mode
, SIGN_EXTEND
, 1, speed_p
);
10284 *cost
+= extra_cost
->mult
[0].simple
;
10288 if (mode
== DImode
)
10291 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10292 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10293 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10294 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10297 *cost
+= extra_cost
->mult
[1].extend
;
10298 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10299 ZERO_EXTEND
, 0, speed_p
)
10300 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10301 ZERO_EXTEND
, 0, speed_p
));
10305 *cost
= LIBCALL_COST (2);
10310 *cost
= LIBCALL_COST (2);
10314 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10315 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10317 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10320 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10325 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10329 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10331 *cost
= LIBCALL_COST (1);
10335 if (mode
== SImode
)
10337 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10339 *cost
+= COSTS_N_INSNS (1);
10340 /* Assume the non-flag-changing variant. */
10342 *cost
+= (extra_cost
->alu
.log_shift
10343 + extra_cost
->alu
.arith_shift
);
10344 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10348 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10349 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10351 *cost
+= COSTS_N_INSNS (1);
10352 /* No extra cost for MOV imm and MVN imm. */
10353 /* If the comparison op is using the flags, there's no further
10354 cost, otherwise we need to add the cost of the comparison. */
10355 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10356 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10357 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10359 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10360 *cost
+= (COSTS_N_INSNS (1)
10361 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10363 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10366 *cost
+= extra_cost
->alu
.arith
;
10372 *cost
+= extra_cost
->alu
.arith
;
10376 if (GET_MODE_CLASS (mode
) == MODE_INT
10377 && GET_MODE_SIZE (mode
) < 4)
10379 /* Slightly disparage, as we might need an extend operation. */
10382 *cost
+= extra_cost
->alu
.arith
;
10386 if (mode
== DImode
)
10388 *cost
+= COSTS_N_INSNS (1);
10390 *cost
+= 2 * extra_cost
->alu
.arith
;
10395 *cost
= LIBCALL_COST (1);
10399 if (mode
== SImode
)
10402 rtx shift_reg
= NULL
;
10404 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10408 if (shift_reg
!= NULL
)
10411 *cost
+= extra_cost
->alu
.log_shift_reg
;
10412 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10415 *cost
+= extra_cost
->alu
.log_shift
;
10416 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10421 *cost
+= extra_cost
->alu
.logical
;
10424 if (mode
== DImode
)
10426 *cost
+= COSTS_N_INSNS (1);
10432 *cost
+= LIBCALL_COST (1);
10437 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10439 *cost
+= COSTS_N_INSNS (3);
10442 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10443 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10445 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10446 /* Assume that if one arm of the if_then_else is a register,
10447 that it will be tied with the result and eliminate the
10448 conditional insn. */
10449 if (REG_P (XEXP (x
, 1)))
10451 else if (REG_P (XEXP (x
, 2)))
10457 if (extra_cost
->alu
.non_exec_costs_exec
)
10458 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10460 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10463 *cost
+= op1cost
+ op2cost
;
10469 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10473 machine_mode op0mode
;
10474 /* We'll mostly assume that the cost of a compare is the cost of the
10475 LHS. However, there are some notable exceptions. */
10477 /* Floating point compares are never done as side-effects. */
10478 op0mode
= GET_MODE (XEXP (x
, 0));
10479 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10480 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10483 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10485 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10487 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10493 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10495 *cost
= LIBCALL_COST (2);
10499 /* DImode compares normally take two insns. */
10500 if (op0mode
== DImode
)
10502 *cost
+= COSTS_N_INSNS (1);
10504 *cost
+= 2 * extra_cost
->alu
.arith
;
10508 if (op0mode
== SImode
)
10513 if (XEXP (x
, 1) == const0_rtx
10514 && !(REG_P (XEXP (x
, 0))
10515 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10516 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10518 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10520 /* Multiply operations that set the flags are often
10521 significantly more expensive. */
10523 && GET_CODE (XEXP (x
, 0)) == MULT
10524 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10525 *cost
+= extra_cost
->mult
[0].flag_setting
;
10528 && GET_CODE (XEXP (x
, 0)) == PLUS
10529 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10530 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10532 *cost
+= extra_cost
->mult
[0].flag_setting
;
10537 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10538 if (shift_op
!= NULL
)
10540 if (shift_reg
!= NULL
)
10542 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10545 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10548 *cost
+= extra_cost
->alu
.arith_shift
;
10549 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10550 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10555 *cost
+= extra_cost
->alu
.arith
;
10556 if (CONST_INT_P (XEXP (x
, 1))
10557 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10559 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10567 *cost
= LIBCALL_COST (2);
10590 if (outer_code
== SET
)
10592 /* Is it a store-flag operation? */
10593 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10594 && XEXP (x
, 1) == const0_rtx
)
10596 /* Thumb also needs an IT insn. */
10597 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10600 if (XEXP (x
, 1) == const0_rtx
)
10605 /* LSR Rd, Rn, #31. */
10607 *cost
+= extra_cost
->alu
.shift
;
10617 *cost
+= COSTS_N_INSNS (1);
10621 /* RSBS T1, Rn, Rn, LSR #31
10623 *cost
+= COSTS_N_INSNS (1);
10625 *cost
+= extra_cost
->alu
.arith_shift
;
10629 /* RSB Rd, Rn, Rn, ASR #1
10630 LSR Rd, Rd, #31. */
10631 *cost
+= COSTS_N_INSNS (1);
10633 *cost
+= (extra_cost
->alu
.arith_shift
10634 + extra_cost
->alu
.shift
);
10640 *cost
+= COSTS_N_INSNS (1);
10642 *cost
+= extra_cost
->alu
.shift
;
10646 /* Remaining cases are either meaningless or would take
10647 three insns anyway. */
10648 *cost
= COSTS_N_INSNS (3);
10651 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10656 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10657 if (CONST_INT_P (XEXP (x
, 1))
10658 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10660 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10667 /* Not directly inside a set. If it involves the condition code
10668 register it must be the condition for a branch, cond_exec or
10669 I_T_E operation. Since the comparison is performed elsewhere
10670 this is just the control part which has no additional
10672 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10673 && XEXP (x
, 1) == const0_rtx
)
10681 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10682 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10685 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10689 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10691 *cost
= LIBCALL_COST (1);
10695 if (mode
== SImode
)
10698 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10702 *cost
= LIBCALL_COST (1);
10706 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10707 && MEM_P (XEXP (x
, 0)))
10709 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10711 if (mode
== DImode
)
10712 *cost
+= COSTS_N_INSNS (1);
10717 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10718 *cost
+= extra_cost
->ldst
.load
;
10720 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10722 if (mode
== DImode
)
10723 *cost
+= extra_cost
->alu
.shift
;
10728 /* Widening from less than 32-bits requires an extend operation. */
10729 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10731 /* We have SXTB/SXTH. */
10732 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10734 *cost
+= extra_cost
->alu
.extend
;
10736 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10738 /* Needs two shifts. */
10739 *cost
+= COSTS_N_INSNS (1);
10740 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10742 *cost
+= 2 * extra_cost
->alu
.shift
;
10745 /* Widening beyond 32-bits requires one more insn. */
10746 if (mode
== DImode
)
10748 *cost
+= COSTS_N_INSNS (1);
10750 *cost
+= extra_cost
->alu
.shift
;
10757 || GET_MODE (XEXP (x
, 0)) == SImode
10758 || GET_MODE (XEXP (x
, 0)) == QImode
)
10759 && MEM_P (XEXP (x
, 0)))
10761 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10763 if (mode
== DImode
)
10764 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10769 /* Widening from less than 32-bits requires an extend operation. */
10770 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10772 /* UXTB can be a shorter instruction in Thumb2, but it might
10773 be slower than the AND Rd, Rn, #255 alternative. When
10774 optimizing for speed it should never be slower to use
10775 AND, and we don't really model 16-bit vs 32-bit insns
10778 *cost
+= extra_cost
->alu
.logical
;
10780 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10782 /* We have UXTB/UXTH. */
10783 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10785 *cost
+= extra_cost
->alu
.extend
;
10787 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10789 /* Needs two shifts. It's marginally preferable to use
10790 shifts rather than two BIC instructions as the second
10791 shift may merge with a subsequent insn as a shifter
10793 *cost
= COSTS_N_INSNS (2);
10794 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10796 *cost
+= 2 * extra_cost
->alu
.shift
;
10799 /* Widening beyond 32-bits requires one more insn. */
10800 if (mode
== DImode
)
10802 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10809 /* CONST_INT has no mode, so we cannot tell for sure how many
10810 insns are really going to be needed. The best we can do is
10811 look at the value passed. If it fits in SImode, then assume
10812 that's the mode it will be used for. Otherwise assume it
10813 will be used in DImode. */
10814 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10819 /* Avoid blowing up in arm_gen_constant (). */
10820 if (!(outer_code
== PLUS
10821 || outer_code
== AND
10822 || outer_code
== IOR
10823 || outer_code
== XOR
10824 || outer_code
== MINUS
))
10828 if (mode
== SImode
)
10830 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10831 INTVAL (x
), NULL
, NULL
,
10837 *cost
+= COSTS_N_INSNS (arm_gen_constant
10838 (outer_code
, SImode
, NULL
,
10839 trunc_int_for_mode (INTVAL (x
), SImode
),
10841 + arm_gen_constant (outer_code
, SImode
, NULL
,
10842 INTVAL (x
) >> 32, NULL
,
10854 if (arm_arch_thumb2
&& !flag_pic
)
10855 *cost
+= COSTS_N_INSNS (1);
10857 *cost
+= extra_cost
->ldst
.load
;
10860 *cost
+= COSTS_N_INSNS (1);
10864 *cost
+= COSTS_N_INSNS (1);
10866 *cost
+= extra_cost
->alu
.arith
;
10872 *cost
= COSTS_N_INSNS (4);
10877 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10878 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10880 if (vfp3_const_double_rtx (x
))
10883 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10889 if (mode
== DFmode
)
10890 *cost
+= extra_cost
->ldst
.loadd
;
10892 *cost
+= extra_cost
->ldst
.loadf
;
10895 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10899 *cost
= COSTS_N_INSNS (4);
10905 && TARGET_HARD_FLOAT
10906 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10907 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10908 *cost
= COSTS_N_INSNS (1);
10910 *cost
= COSTS_N_INSNS (4);
10915 /* When optimizing for size, we prefer constant pool entries to
10916 MOVW/MOVT pairs, so bump the cost of these slightly. */
10923 *cost
+= extra_cost
->alu
.clz
;
10927 if (XEXP (x
, 1) == const0_rtx
)
10930 *cost
+= extra_cost
->alu
.log_shift
;
10931 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10934 /* Fall through. */
10938 *cost
+= COSTS_N_INSNS (1);
10942 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10943 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10944 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10945 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10946 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10947 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10948 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10949 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10953 *cost
+= extra_cost
->mult
[1].extend
;
10954 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10955 ZERO_EXTEND
, 0, speed_p
)
10956 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10957 ZERO_EXTEND
, 0, speed_p
));
10960 *cost
= LIBCALL_COST (1);
10963 case UNSPEC_VOLATILE
:
10965 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10968 /* Reading the PC is like reading any other register. Writing it
10969 is more expensive, but we take that into account elsewhere. */
10974 /* TODO: Simple zero_extract of bottom bits using AND. */
10975 /* Fall through. */
10979 && CONST_INT_P (XEXP (x
, 1))
10980 && CONST_INT_P (XEXP (x
, 2)))
10983 *cost
+= extra_cost
->alu
.bfx
;
10984 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10987 /* Without UBFX/SBFX, need to resort to shift operations. */
10988 *cost
+= COSTS_N_INSNS (1);
10990 *cost
+= 2 * extra_cost
->alu
.shift
;
10991 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10995 if (TARGET_HARD_FLOAT
)
10998 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10999 if (!TARGET_FPU_ARMV8
11000 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11002 /* Pre v8, widening HF->DF is a two-step process, first
11003 widening to SFmode. */
11004 *cost
+= COSTS_N_INSNS (1);
11006 *cost
+= extra_cost
->fp
[0].widen
;
11008 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11012 *cost
= LIBCALL_COST (1);
11015 case FLOAT_TRUNCATE
:
11016 if (TARGET_HARD_FLOAT
)
11019 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11020 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11022 /* Vector modes? */
11024 *cost
= LIBCALL_COST (1);
11028 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11030 rtx op0
= XEXP (x
, 0);
11031 rtx op1
= XEXP (x
, 1);
11032 rtx op2
= XEXP (x
, 2);
11035 /* vfms or vfnma. */
11036 if (GET_CODE (op0
) == NEG
)
11037 op0
= XEXP (op0
, 0);
11039 /* vfnms or vfnma. */
11040 if (GET_CODE (op2
) == NEG
)
11041 op2
= XEXP (op2
, 0);
11043 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11044 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11045 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11048 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11053 *cost
= LIBCALL_COST (3);
11058 if (TARGET_HARD_FLOAT
)
11060 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11062 mode
= GET_MODE (XEXP (x
, 0));
11064 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11065 /* Strip of the 'cost' of rounding towards zero. */
11066 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11067 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11070 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11071 /* ??? Increase the cost to deal with transferring from
11072 FP -> CORE registers? */
11075 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11076 && TARGET_FPU_ARMV8
)
11079 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
11082 /* Vector costs? */
11084 *cost
= LIBCALL_COST (1);
11088 case UNSIGNED_FLOAT
:
11089 if (TARGET_HARD_FLOAT
)
11091 /* ??? Increase the cost to deal with transferring from CORE
11092 -> FP registers? */
11094 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
11097 *cost
= LIBCALL_COST (1);
11105 /* Just a guess. Guess number of instructions in the asm
11106 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11107 though (see PR60663). */
11108 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
11109 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
11111 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
11115 if (mode
!= VOIDmode
)
11116 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
11118 *cost
= COSTS_N_INSNS (4); /* Who knows? */
11123 #undef HANDLE_NARROW_SHIFT_ARITH
11125 /* RTX costs when optimizing for size. */
11127 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
11128 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
11131 int code
= GET_CODE (x
);
11133 if (TARGET_OLD_RTX_COSTS
11134 || (!current_tune
->insn_extra_cost
&& !TARGET_NEW_GENERIC_COSTS
))
11136 /* Old way. (Deprecated.) */
11138 result
= arm_size_rtx_costs (x
, (enum rtx_code
) code
,
11139 (enum rtx_code
) outer_code
, total
);
11141 result
= current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
11142 (enum rtx_code
) outer_code
, total
,
11148 if (current_tune
->insn_extra_cost
)
11149 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11150 (enum rtx_code
) outer_code
,
11151 current_tune
->insn_extra_cost
,
11153 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11154 && current_tune->insn_extra_cost != NULL */
11156 result
= arm_new_rtx_costs (x
, (enum rtx_code
) code
,
11157 (enum rtx_code
) outer_code
,
11158 &generic_extra_costs
, total
, speed
);
11161 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
11163 print_rtl_single (dump_file
, x
);
11164 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
11165 *total
, result
? "final" : "partial");
11170 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11171 supported on any "slowmul" cores, so it can be ignored. */
11174 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11175 int *total
, bool speed
)
11177 machine_mode mode
= GET_MODE (x
);
11181 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11188 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11191 *total
= COSTS_N_INSNS (20);
11195 if (CONST_INT_P (XEXP (x
, 1)))
11197 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11198 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11199 int cost
, const_ok
= const_ok_for_arm (i
);
11200 int j
, booth_unit_size
;
11202 /* Tune as appropriate. */
11203 cost
= const_ok
? 4 : 8;
11204 booth_unit_size
= 2;
11205 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11207 i
>>= booth_unit_size
;
11211 *total
= COSTS_N_INSNS (cost
);
11212 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
11216 *total
= COSTS_N_INSNS (20);
11220 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
11225 /* RTX cost for cores with a fast multiply unit (M variants). */
11228 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11229 int *total
, bool speed
)
11231 machine_mode mode
= GET_MODE (x
);
11235 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11239 /* ??? should thumb2 use different costs? */
11243 /* There is no point basing this on the tuning, since it is always the
11244 fast variant if it exists at all. */
11246 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11247 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11248 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11250 *total
= COSTS_N_INSNS(2);
11255 if (mode
== DImode
)
11257 *total
= COSTS_N_INSNS (5);
11261 if (CONST_INT_P (XEXP (x
, 1)))
11263 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
11264 & (unsigned HOST_WIDE_INT
) 0xffffffff);
11265 int cost
, const_ok
= const_ok_for_arm (i
);
11266 int j
, booth_unit_size
;
11268 /* Tune as appropriate. */
11269 cost
= const_ok
? 4 : 8;
11270 booth_unit_size
= 8;
11271 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
11273 i
>>= booth_unit_size
;
11277 *total
= COSTS_N_INSNS(cost
);
11281 if (mode
== SImode
)
11283 *total
= COSTS_N_INSNS (4);
11287 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11289 if (TARGET_HARD_FLOAT
11291 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11293 *total
= COSTS_N_INSNS (1);
11298 /* Requires a lib call */
11299 *total
= COSTS_N_INSNS (20);
11303 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11308 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11309 so it can be ignored. */
11312 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11313 int *total
, bool speed
)
11315 machine_mode mode
= GET_MODE (x
);
11319 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11326 if (GET_CODE (XEXP (x
, 0)) != MULT
)
11327 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11329 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11330 will stall until the multiplication is complete. */
11331 *total
= COSTS_N_INSNS (3);
11335 /* There is no point basing this on the tuning, since it is always the
11336 fast variant if it exists at all. */
11338 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11339 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11340 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11342 *total
= COSTS_N_INSNS (2);
11347 if (mode
== DImode
)
11349 *total
= COSTS_N_INSNS (5);
11353 if (CONST_INT_P (XEXP (x
, 1)))
11355 /* If operand 1 is a constant we can more accurately
11356 calculate the cost of the multiply. The multiplier can
11357 retire 15 bits on the first cycle and a further 12 on the
11358 second. We do, of course, have to load the constant into
11359 a register first. */
11360 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
11361 /* There's a general overhead of one cycle. */
11363 unsigned HOST_WIDE_INT masked_const
;
11365 if (i
& 0x80000000)
11368 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
11370 masked_const
= i
& 0xffff8000;
11371 if (masked_const
!= 0)
11374 masked_const
= i
& 0xf8000000;
11375 if (masked_const
!= 0)
11378 *total
= COSTS_N_INSNS (cost
);
11382 if (mode
== SImode
)
11384 *total
= COSTS_N_INSNS (3);
11388 /* Requires a lib call */
11389 *total
= COSTS_N_INSNS (20);
11393 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11398 /* RTX costs for 9e (and later) cores. */
11401 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
11402 int *total
, bool speed
)
11404 machine_mode mode
= GET_MODE (x
);
11411 /* Small multiply: 32 cycles for an integer multiply inst. */
11412 if (arm_arch6m
&& arm_m_profile_small_mul
)
11413 *total
= COSTS_N_INSNS (32);
11415 *total
= COSTS_N_INSNS (3);
11419 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
11427 /* There is no point basing this on the tuning, since it is always the
11428 fast variant if it exists at all. */
11430 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
11431 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11432 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
11434 *total
= COSTS_N_INSNS (2);
11439 if (mode
== DImode
)
11441 *total
= COSTS_N_INSNS (5);
11445 if (mode
== SImode
)
11447 *total
= COSTS_N_INSNS (2);
11451 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11453 if (TARGET_HARD_FLOAT
11455 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
11457 *total
= COSTS_N_INSNS (1);
11462 *total
= COSTS_N_INSNS (20);
11466 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
11469 /* All address computations that can be done are free, but rtx cost returns
11470 the same for practically all of them. So we weight the different types
11471 of address here in the order (most pref first):
11472 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11474 arm_arm_address_cost (rtx x
)
11476 enum rtx_code c
= GET_CODE (x
);
11478 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
11480 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
11485 if (CONST_INT_P (XEXP (x
, 1)))
11488 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
11498 arm_thumb_address_cost (rtx x
)
11500 enum rtx_code c
= GET_CODE (x
);
11505 && REG_P (XEXP (x
, 0))
11506 && CONST_INT_P (XEXP (x
, 1)))
11513 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11514 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11516 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11519 /* Adjust cost hook for XScale. */
11521 xscale_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11523 /* Some true dependencies can have a higher cost depending
11524 on precisely how certain input operands are used. */
11525 if (REG_NOTE_KIND(link
) == 0
11526 && recog_memoized (insn
) >= 0
11527 && recog_memoized (dep
) >= 0)
11529 int shift_opnum
= get_attr_shift (insn
);
11530 enum attr_type attr_type
= get_attr_type (dep
);
11532 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11533 operand for INSN. If we have a shifted input operand and the
11534 instruction we depend on is another ALU instruction, then we may
11535 have to account for an additional stall. */
11536 if (shift_opnum
!= 0
11537 && (attr_type
== TYPE_ALU_SHIFT_IMM
11538 || attr_type
== TYPE_ALUS_SHIFT_IMM
11539 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11540 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11541 || attr_type
== TYPE_ALU_SHIFT_REG
11542 || attr_type
== TYPE_ALUS_SHIFT_REG
11543 || attr_type
== TYPE_LOGIC_SHIFT_REG
11544 || attr_type
== TYPE_LOGICS_SHIFT_REG
11545 || attr_type
== TYPE_MOV_SHIFT
11546 || attr_type
== TYPE_MVN_SHIFT
11547 || attr_type
== TYPE_MOV_SHIFT_REG
11548 || attr_type
== TYPE_MVN_SHIFT_REG
))
11550 rtx shifted_operand
;
11553 /* Get the shifted operand. */
11554 extract_insn (insn
);
11555 shifted_operand
= recog_data
.operand
[shift_opnum
];
11557 /* Iterate over all the operands in DEP. If we write an operand
11558 that overlaps with SHIFTED_OPERAND, then we have increase the
11559 cost of this dependency. */
11560 extract_insn (dep
);
11561 preprocess_constraints (dep
);
11562 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11564 /* We can ignore strict inputs. */
11565 if (recog_data
.operand_type
[opno
] == OP_IN
)
11568 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11580 /* Adjust cost hook for Cortex A9. */
11582 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11584 switch (REG_NOTE_KIND (link
))
11591 case REG_DEP_OUTPUT
:
11592 if (recog_memoized (insn
) >= 0
11593 && recog_memoized (dep
) >= 0)
11595 if (GET_CODE (PATTERN (insn
)) == SET
)
11598 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11600 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11602 enum attr_type attr_type_insn
= get_attr_type (insn
);
11603 enum attr_type attr_type_dep
= get_attr_type (dep
);
11605 /* By default all dependencies of the form
11608 have an extra latency of 1 cycle because
11609 of the input and output dependency in this
11610 case. However this gets modeled as an true
11611 dependency and hence all these checks. */
11612 if (REG_P (SET_DEST (PATTERN (insn
)))
11613 && REG_P (SET_DEST (PATTERN (dep
)))
11614 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
11615 SET_DEST (PATTERN (dep
))))
11617 /* FMACS is a special case where the dependent
11618 instruction can be issued 3 cycles before
11619 the normal latency in case of an output
11621 if ((attr_type_insn
== TYPE_FMACS
11622 || attr_type_insn
== TYPE_FMACD
)
11623 && (attr_type_dep
== TYPE_FMACS
11624 || attr_type_dep
== TYPE_FMACD
))
11626 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11627 *cost
= insn_default_latency (dep
) - 3;
11629 *cost
= insn_default_latency (dep
);
11634 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
11635 *cost
= insn_default_latency (dep
) + 1;
11637 *cost
= insn_default_latency (dep
);
11647 gcc_unreachable ();
11653 /* Adjust cost hook for FA726TE. */
11655 fa726te_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int * cost
)
11657 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11658 have penalty of 3. */
11659 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
11660 && recog_memoized (insn
) >= 0
11661 && recog_memoized (dep
) >= 0
11662 && get_attr_conds (dep
) == CONDS_SET
)
11664 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11665 if (get_attr_conds (insn
) == CONDS_USE
11666 && get_attr_type (insn
) != TYPE_BRANCH
)
11672 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11673 || get_attr_conds (insn
) == CONDS_USE
)
11683 /* Implement TARGET_REGISTER_MOVE_COST.
11685 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11686 it is typically more expensive than a single memory access. We set
11687 the cost to less than two memory accesses so that floating
11688 point to integer conversion does not go through memory. */
11691 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11692 reg_class_t from
, reg_class_t to
)
11696 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11697 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11699 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11700 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11702 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11709 if (from
== HI_REGS
|| to
== HI_REGS
)
11716 /* Implement TARGET_MEMORY_MOVE_COST. */
11719 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11720 bool in ATTRIBUTE_UNUSED
)
11726 if (GET_MODE_SIZE (mode
) < 4)
11729 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11733 /* Vectorizer cost model implementation. */
11735 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11737 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11739 int misalign ATTRIBUTE_UNUSED
)
11743 switch (type_of_cost
)
11746 return current_tune
->vec_costs
->scalar_stmt_cost
;
11749 return current_tune
->vec_costs
->scalar_load_cost
;
11752 return current_tune
->vec_costs
->scalar_store_cost
;
11755 return current_tune
->vec_costs
->vec_stmt_cost
;
11758 return current_tune
->vec_costs
->vec_align_load_cost
;
11761 return current_tune
->vec_costs
->vec_store_cost
;
11763 case vec_to_scalar
:
11764 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11766 case scalar_to_vec
:
11767 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11769 case unaligned_load
:
11770 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11772 case unaligned_store
:
11773 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11775 case cond_branch_taken
:
11776 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11778 case cond_branch_not_taken
:
11779 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11782 case vec_promote_demote
:
11783 return current_tune
->vec_costs
->vec_stmt_cost
;
11785 case vec_construct
:
11786 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11787 return elements
/ 2 + 1;
11790 gcc_unreachable ();
11794 /* Implement targetm.vectorize.add_stmt_cost. */
11797 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11798 struct _stmt_vec_info
*stmt_info
, int misalign
,
11799 enum vect_cost_model_location where
)
11801 unsigned *cost
= (unsigned *) data
;
11802 unsigned retval
= 0;
11804 if (flag_vect_cost_model
)
11806 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11807 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11809 /* Statements in an inner loop relative to the loop being
11810 vectorized are weighted more heavily. The value here is
11811 arbitrary and could potentially be improved with analysis. */
11812 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11813 count
*= 50; /* FIXME. */
11815 retval
= (unsigned) (count
* stmt_cost
);
11816 cost
[where
] += retval
;
11822 /* Return true if and only if this insn can dual-issue only as older. */
11824 cortexa7_older_only (rtx_insn
*insn
)
11826 if (recog_memoized (insn
) < 0)
11829 switch (get_attr_type (insn
))
11831 case TYPE_ALU_DSP_REG
:
11832 case TYPE_ALU_SREG
:
11833 case TYPE_ALUS_SREG
:
11834 case TYPE_LOGIC_REG
:
11835 case TYPE_LOGICS_REG
:
11837 case TYPE_ADCS_REG
:
11842 case TYPE_SHIFT_IMM
:
11843 case TYPE_SHIFT_REG
:
11844 case TYPE_LOAD_BYTE
:
11847 case TYPE_FFARITHS
:
11849 case TYPE_FFARITHD
:
11867 case TYPE_F_STORES
:
11874 /* Return true if and only if this insn can dual-issue as younger. */
11876 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11878 if (recog_memoized (insn
) < 0)
11881 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11885 switch (get_attr_type (insn
))
11888 case TYPE_ALUS_IMM
:
11889 case TYPE_LOGIC_IMM
:
11890 case TYPE_LOGICS_IMM
:
11895 case TYPE_MOV_SHIFT
:
11896 case TYPE_MOV_SHIFT_REG
:
11906 /* Look for an instruction that can dual issue only as an older
11907 instruction, and move it in front of any instructions that can
11908 dual-issue as younger, while preserving the relative order of all
11909 other instructions in the ready list. This is a hueuristic to help
11910 dual-issue in later cycles, by postponing issue of more flexible
11911 instructions. This heuristic may affect dual issue opportunities
11912 in the current cycle. */
11914 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11915 int *n_readyp
, int clock
)
11918 int first_older_only
= -1, first_younger
= -1;
11922 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11926 /* Traverse the ready list from the head (the instruction to issue
11927 first), and looking for the first instruction that can issue as
11928 younger and the first instruction that can dual-issue only as
11930 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11932 rtx_insn
*insn
= ready
[i
];
11933 if (cortexa7_older_only (insn
))
11935 first_older_only
= i
;
11937 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11940 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11944 /* Nothing to reorder because either no younger insn found or insn
11945 that can dual-issue only as older appears before any insn that
11946 can dual-issue as younger. */
11947 if (first_younger
== -1)
11950 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11954 /* Nothing to reorder because no older-only insn in the ready list. */
11955 if (first_older_only
== -1)
11958 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11962 /* Move first_older_only insn before first_younger. */
11964 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11965 INSN_UID(ready
[first_older_only
]),
11966 INSN_UID(ready
[first_younger
]));
11967 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11968 for (i
= first_older_only
; i
< first_younger
; i
++)
11970 ready
[i
] = ready
[i
+1];
11973 ready
[i
] = first_older_only_insn
;
11977 /* Implement TARGET_SCHED_REORDER. */
11979 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11985 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11988 /* Do nothing for other cores. */
11992 return arm_issue_rate ();
11995 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11996 It corrects the value of COST based on the relationship between
11997 INSN and DEP through the dependence LINK. It returns the new
11998 value. There is a per-core adjust_cost hook to adjust scheduler costs
11999 and the per-core hook can choose to completely override the generic
12000 adjust_cost function. Only put bits of code into arm_adjust_cost that
12001 are common across all cores. */
12003 arm_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep
, int cost
)
12007 /* When generating Thumb-1 code, we want to place flag-setting operations
12008 close to a conditional branch which depends on them, so that we can
12009 omit the comparison. */
12011 && REG_NOTE_KIND (link
) == 0
12012 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12013 && recog_memoized (dep
) >= 0
12014 && get_attr_conds (dep
) == CONDS_SET
)
12017 if (current_tune
->sched_adjust_cost
!= NULL
)
12019 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
12023 /* XXX Is this strictly true? */
12024 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
12025 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
12028 /* Call insns don't incur a stall, even if they follow a load. */
12029 if (REG_NOTE_KIND (link
) == 0
12033 if ((i_pat
= single_set (insn
)) != NULL
12034 && MEM_P (SET_SRC (i_pat
))
12035 && (d_pat
= single_set (dep
)) != NULL
12036 && MEM_P (SET_DEST (d_pat
)))
12038 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12039 /* This is a load after a store, there is no conflict if the load reads
12040 from a cached area. Assume that loads from the stack, and from the
12041 constant pool are cached, and that others will miss. This is a
12044 if ((GET_CODE (src_mem
) == SYMBOL_REF
12045 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12046 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12047 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12048 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12056 arm_max_conditional_execute (void)
12058 return max_insns_skipped
;
12062 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12065 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12067 return (optimize
> 0) ? 2 : 0;
12071 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12073 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12076 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12077 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12078 sequences of non-executed instructions in IT blocks probably take the same
12079 amount of time as executed instructions (and the IT instruction itself takes
12080 space in icache). This function was experimentally determined to give good
12081 results on a popular embedded benchmark. */
12084 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12086 return (TARGET_32BIT
&& speed_p
) ? 1
12087 : arm_default_branch_cost (speed_p
, predictable_p
);
12091 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12093 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12096 static bool fp_consts_inited
= false;
12098 static REAL_VALUE_TYPE value_fp0
;
12101 init_fp_table (void)
12105 r
= REAL_VALUE_ATOF ("0", DFmode
);
12107 fp_consts_inited
= true;
12110 /* Return TRUE if rtx X is a valid immediate FP constant. */
12112 arm_const_double_rtx (rtx x
)
12114 const REAL_VALUE_TYPE
*r
;
12116 if (!fp_consts_inited
)
12119 r
= CONST_DOUBLE_REAL_VALUE (x
);
12120 if (REAL_VALUE_MINUS_ZERO (*r
))
12123 if (real_equal (r
, &value_fp0
))
12129 /* VFPv3 has a fairly wide range of representable immediates, formed from
12130 "quarter-precision" floating-point values. These can be evaluated using this
12131 formula (with ^ for exponentiation):
12135 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12136 16 <= n <= 31 and 0 <= r <= 7.
12138 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12140 - A (most-significant) is the sign bit.
12141 - BCD are the exponent (encoded as r XOR 3).
12142 - EFGH are the mantissa (encoded as n - 16).
12145 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12146 fconst[sd] instruction, or -1 if X isn't suitable. */
12148 vfp3_const_double_index (rtx x
)
12150 REAL_VALUE_TYPE r
, m
;
12151 int sign
, exponent
;
12152 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12153 unsigned HOST_WIDE_INT mask
;
12154 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12157 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12160 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12162 /* We can't represent these things, so detect them first. */
12163 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12166 /* Extract sign, exponent and mantissa. */
12167 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12168 r
= real_value_abs (&r
);
12169 exponent
= REAL_EXP (&r
);
12170 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12171 highest (sign) bit, with a fixed binary point at bit point_pos.
12172 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12173 bits for the mantissa, this may fail (low bits would be lost). */
12174 real_ldexp (&m
, &r
, point_pos
- exponent
);
12175 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12176 mantissa
= w
.elt (0);
12177 mant_hi
= w
.elt (1);
12179 /* If there are bits set in the low part of the mantissa, we can't
12180 represent this value. */
12184 /* Now make it so that mantissa contains the most-significant bits, and move
12185 the point_pos to indicate that the least-significant bits have been
12187 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12188 mantissa
= mant_hi
;
12190 /* We can permit four significant bits of mantissa only, plus a high bit
12191 which is always 1. */
12192 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12193 if ((mantissa
& mask
) != 0)
12196 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12197 mantissa
>>= point_pos
- 5;
12199 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12200 floating-point immediate zero with Neon using an integer-zero load, but
12201 that case is handled elsewhere.) */
12205 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12207 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12208 normalized significands are in the range [1, 2). (Our mantissa is shifted
12209 left 4 places at this point relative to normalized IEEE754 values). GCC
12210 internally uses [0.5, 1) (see real.c), so the exponent returned from
12211 REAL_EXP must be altered. */
12212 exponent
= 5 - exponent
;
12214 if (exponent
< 0 || exponent
> 7)
12217 /* Sign, mantissa and exponent are now in the correct form to plug into the
12218 formula described in the comment above. */
12219 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12222 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12224 vfp3_const_double_rtx (rtx x
)
12229 return vfp3_const_double_index (x
) != -1;
12232 /* Recognize immediates which can be used in various Neon instructions. Legal
12233 immediates are described by the following table (for VMVN variants, the
12234 bitwise inverse of the constant shown is recognized. In either case, VMOV
12235 is output and the correct instruction to use for a given constant is chosen
12236 by the assembler). The constant shown is replicated across all elements of
12237 the destination vector.
12239 insn elems variant constant (binary)
12240 ---- ----- ------- -----------------
12241 vmov i32 0 00000000 00000000 00000000 abcdefgh
12242 vmov i32 1 00000000 00000000 abcdefgh 00000000
12243 vmov i32 2 00000000 abcdefgh 00000000 00000000
12244 vmov i32 3 abcdefgh 00000000 00000000 00000000
12245 vmov i16 4 00000000 abcdefgh
12246 vmov i16 5 abcdefgh 00000000
12247 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12248 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12249 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12250 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12251 vmvn i16 10 00000000 abcdefgh
12252 vmvn i16 11 abcdefgh 00000000
12253 vmov i32 12 00000000 00000000 abcdefgh 11111111
12254 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12255 vmov i32 14 00000000 abcdefgh 11111111 11111111
12256 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12257 vmov i8 16 abcdefgh
12258 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12259 eeeeeeee ffffffff gggggggg hhhhhhhh
12260 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12261 vmov f32 19 00000000 00000000 00000000 00000000
12263 For case 18, B = !b. Representable values are exactly those accepted by
12264 vfp3_const_double_index, but are output as floating-point numbers rather
12267 For case 19, we will change it to vmov.i32 when assembling.
12269 Variants 0-5 (inclusive) may also be used as immediates for the second
12270 operand of VORR/VBIC instructions.
12272 The INVERSE argument causes the bitwise inverse of the given operand to be
12273 recognized instead (used for recognizing legal immediates for the VAND/VORN
12274 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12275 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12276 output, rather than the real insns vbic/vorr).
12278 INVERSE makes no difference to the recognition of float vectors.
12280 The return value is the variant of immediate as shown in the above table, or
12281 -1 if the given value doesn't match any of the listed patterns.
12284 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12285 rtx
*modconst
, int *elementwidth
)
12287 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12289 for (i = 0; i < idx; i += (STRIDE)) \
12294 immtype = (CLASS); \
12295 elsize = (ELSIZE); \
12299 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12300 unsigned int innersize
;
12301 unsigned char bytes
[16];
12302 int immtype
= -1, matches
;
12303 unsigned int invmask
= inverse
? 0xff : 0;
12304 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12307 n_elts
= CONST_VECTOR_NUNITS (op
);
12311 if (mode
== VOIDmode
)
12315 innersize
= GET_MODE_UNIT_SIZE (mode
);
12317 /* Vectors of float constants. */
12318 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12320 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12321 const REAL_VALUE_TYPE
*r0
;
12323 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12326 r0
= CONST_DOUBLE_REAL_VALUE (el0
);
12328 for (i
= 1; i
< n_elts
; i
++)
12330 rtx elt
= CONST_VECTOR_ELT (op
, i
);
12331 if (!real_equal (r0
, CONST_DOUBLE_REAL_VALUE (elt
)))
12336 *modconst
= CONST_VECTOR_ELT (op
, 0);
12341 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12347 /* Splat vector constant out into a byte vector. */
12348 for (i
= 0; i
< n_elts
; i
++)
12350 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12351 unsigned HOST_WIDE_INT elpart
;
12352 unsigned int part
, parts
;
12354 if (CONST_INT_P (el
))
12356 elpart
= INTVAL (el
);
12359 else if (CONST_DOUBLE_P (el
))
12361 elpart
= CONST_DOUBLE_LOW (el
);
12365 gcc_unreachable ();
12367 for (part
= 0; part
< parts
; part
++)
12370 for (byte
= 0; byte
< innersize
; byte
++)
12372 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12373 elpart
>>= BITS_PER_UNIT
;
12375 if (CONST_DOUBLE_P (el
))
12376 elpart
= CONST_DOUBLE_HIGH (el
);
12380 /* Sanity check. */
12381 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12385 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12386 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12388 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12389 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12391 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12392 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12394 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12395 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12397 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12399 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12401 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12402 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12404 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12405 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12407 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12408 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12410 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12411 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
12413 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
12415 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
12417 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12418 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12420 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12421 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12423 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12424 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12426 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12427 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
12429 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
12431 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
12432 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
12440 *elementwidth
= elsize
;
12444 unsigned HOST_WIDE_INT imm
= 0;
12446 /* Un-invert bytes of recognized vector, if necessary. */
12448 for (i
= 0; i
< idx
; i
++)
12449 bytes
[i
] ^= invmask
;
12453 /* FIXME: Broken on 32-bit H_W_I hosts. */
12454 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
12456 for (i
= 0; i
< 8; i
++)
12457 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
12458 << (i
* BITS_PER_UNIT
);
12460 *modconst
= GEN_INT (imm
);
12464 unsigned HOST_WIDE_INT imm
= 0;
12466 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
12467 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
12469 *modconst
= GEN_INT (imm
);
12477 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12478 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12479 float elements), and a modified constant (whatever should be output for a
12480 VMOV) in *MODCONST. */
12483 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
12484 rtx
*modconst
, int *elementwidth
)
12488 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
12494 *modconst
= tmpconst
;
12497 *elementwidth
= tmpwidth
;
12502 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12503 the immediate is valid, write a constant suitable for using as an operand
12504 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12505 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12508 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
12509 rtx
*modconst
, int *elementwidth
)
12513 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
12515 if (retval
< 0 || retval
> 5)
12519 *modconst
= tmpconst
;
12522 *elementwidth
= tmpwidth
;
12527 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12528 the immediate is valid, write a constant suitable for using as an operand
12529 to VSHR/VSHL to *MODCONST and the corresponding element width to
12530 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12531 because they have different limitations. */
12534 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12535 rtx
*modconst
, int *elementwidth
,
12538 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12539 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12540 unsigned HOST_WIDE_INT last_elt
= 0;
12541 unsigned HOST_WIDE_INT maxshift
;
12543 /* Split vector constant out into a byte vector. */
12544 for (i
= 0; i
< n_elts
; i
++)
12546 rtx el
= CONST_VECTOR_ELT (op
, i
);
12547 unsigned HOST_WIDE_INT elpart
;
12549 if (CONST_INT_P (el
))
12550 elpart
= INTVAL (el
);
12551 else if (CONST_DOUBLE_P (el
))
12554 gcc_unreachable ();
12556 if (i
!= 0 && elpart
!= last_elt
)
12562 /* Shift less than element size. */
12563 maxshift
= innersize
* 8;
12567 /* Left shift immediate value can be from 0 to <size>-1. */
12568 if (last_elt
>= maxshift
)
12573 /* Right shift immediate value can be from 1 to <size>. */
12574 if (last_elt
== 0 || last_elt
> maxshift
)
12579 *elementwidth
= innersize
* 8;
12582 *modconst
= CONST_VECTOR_ELT (op
, 0);
12587 /* Return a string suitable for output of Neon immediate logic operation
12591 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12592 int inverse
, int quad
)
12594 int width
, is_valid
;
12595 static char templ
[40];
12597 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12599 gcc_assert (is_valid
!= 0);
12602 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12604 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12609 /* Return a string suitable for output of Neon immediate shift operation
12610 (VSHR or VSHL) MNEM. */
12613 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12614 machine_mode mode
, int quad
,
12617 int width
, is_valid
;
12618 static char templ
[40];
12620 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12621 gcc_assert (is_valid
!= 0);
12624 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12626 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12631 /* Output a sequence of pairwise operations to implement a reduction.
12632 NOTE: We do "too much work" here, because pairwise operations work on two
12633 registers-worth of operands in one go. Unfortunately we can't exploit those
12634 extra calculations to do the full operation in fewer steps, I don't think.
12635 Although all vector elements of the result but the first are ignored, we
12636 actually calculate the same result in each of the elements. An alternative
12637 such as initially loading a vector with zero to use as each of the second
12638 operands would use up an additional register and take an extra instruction,
12639 for no particular gain. */
12642 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12643 rtx (*reduc
) (rtx
, rtx
, rtx
))
12645 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12648 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12650 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12651 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12656 /* If VALS is a vector constant that can be loaded into a register
12657 using VDUP, generate instructions to do so and return an RTX to
12658 assign to the register. Otherwise return NULL_RTX. */
12661 neon_vdup_constant (rtx vals
)
12663 machine_mode mode
= GET_MODE (vals
);
12664 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12667 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12670 if (!const_vec_duplicate_p (vals
, &x
))
12671 /* The elements are not all the same. We could handle repeating
12672 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12673 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12677 /* We can load this constant by using VDUP and a constant in a
12678 single ARM register. This will be cheaper than a vector
12681 x
= copy_to_mode_reg (inner_mode
, x
);
12682 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12685 /* Generate code to load VALS, which is a PARALLEL containing only
12686 constants (for vec_init) or CONST_VECTOR, efficiently into a
12687 register. Returns an RTX to copy into the register, or NULL_RTX
12688 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12691 neon_make_constant (rtx vals
)
12693 machine_mode mode
= GET_MODE (vals
);
12695 rtx const_vec
= NULL_RTX
;
12696 int n_elts
= GET_MODE_NUNITS (mode
);
12700 if (GET_CODE (vals
) == CONST_VECTOR
)
12702 else if (GET_CODE (vals
) == PARALLEL
)
12704 /* A CONST_VECTOR must contain only CONST_INTs and
12705 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12706 Only store valid constants in a CONST_VECTOR. */
12707 for (i
= 0; i
< n_elts
; ++i
)
12709 rtx x
= XVECEXP (vals
, 0, i
);
12710 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12713 if (n_const
== n_elts
)
12714 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12717 gcc_unreachable ();
12719 if (const_vec
!= NULL
12720 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12721 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12723 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12724 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12725 pipeline cycle; creating the constant takes one or two ARM
12726 pipeline cycles. */
12728 else if (const_vec
!= NULL_RTX
)
12729 /* Load from constant pool. On Cortex-A8 this takes two cycles
12730 (for either double or quad vectors). We can not take advantage
12731 of single-cycle VLD1 because we need a PC-relative addressing
12735 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12736 We can not construct an initializer. */
12740 /* Initialize vector TARGET to VALS. */
12743 neon_expand_vector_init (rtx target
, rtx vals
)
12745 machine_mode mode
= GET_MODE (target
);
12746 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12747 int n_elts
= GET_MODE_NUNITS (mode
);
12748 int n_var
= 0, one_var
= -1;
12749 bool all_same
= true;
12753 for (i
= 0; i
< n_elts
; ++i
)
12755 x
= XVECEXP (vals
, 0, i
);
12756 if (!CONSTANT_P (x
))
12757 ++n_var
, one_var
= i
;
12759 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12765 rtx constant
= neon_make_constant (vals
);
12766 if (constant
!= NULL_RTX
)
12768 emit_move_insn (target
, constant
);
12773 /* Splat a single non-constant element if we can. */
12774 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12776 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12777 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12781 /* One field is non-constant. Load constant then overwrite varying
12782 field. This is more efficient than using the stack. */
12785 rtx copy
= copy_rtx (vals
);
12786 rtx index
= GEN_INT (one_var
);
12788 /* Load constant part of vector, substitute neighboring value for
12789 varying element. */
12790 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12791 neon_expand_vector_init (target
, copy
);
12793 /* Insert variable. */
12794 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12798 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12801 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12804 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12807 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12810 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12813 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12816 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12819 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12822 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12825 gcc_unreachable ();
12830 /* Construct the vector in memory one field at a time
12831 and load the whole vector. */
12832 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12833 for (i
= 0; i
< n_elts
; i
++)
12834 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12835 i
* GET_MODE_SIZE (inner_mode
)),
12836 XVECEXP (vals
, 0, i
));
12837 emit_move_insn (target
, mem
);
12840 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12841 ERR if it doesn't. EXP indicates the source location, which includes the
12842 inlining history for intrinsics. */
12845 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12846 const_tree exp
, const char *desc
)
12848 HOST_WIDE_INT lane
;
12850 gcc_assert (CONST_INT_P (operand
));
12852 lane
= INTVAL (operand
);
12854 if (lane
< low
|| lane
>= high
)
12857 error ("%K%s %wd out of range %wd - %wd",
12858 exp
, desc
, lane
, low
, high
- 1);
12860 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12864 /* Bounds-check lanes. */
12867 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12870 bounds_check (operand
, low
, high
, exp
, "lane");
12873 /* Bounds-check constants. */
12876 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12878 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12882 neon_element_bits (machine_mode mode
)
12884 return GET_MODE_UNIT_BITSIZE (mode
);
12888 /* Predicates for `match_operand' and `match_operator'. */
12890 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12891 WB is true if full writeback address modes are allowed and is false
12892 if limited writeback address modes (POST_INC and PRE_DEC) are
12896 arm_coproc_mem_operand (rtx op
, bool wb
)
12900 /* Reject eliminable registers. */
12901 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12902 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12903 || reg_mentioned_p (arg_pointer_rtx
, op
)
12904 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12905 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12906 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12907 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12910 /* Constants are converted into offsets from labels. */
12914 ind
= XEXP (op
, 0);
12916 if (reload_completed
12917 && (GET_CODE (ind
) == LABEL_REF
12918 || (GET_CODE (ind
) == CONST
12919 && GET_CODE (XEXP (ind
, 0)) == PLUS
12920 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12921 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12924 /* Match: (mem (reg)). */
12926 return arm_address_register_rtx_p (ind
, 0);
12928 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12929 acceptable in any case (subject to verification by
12930 arm_address_register_rtx_p). We need WB to be true to accept
12931 PRE_INC and POST_DEC. */
12932 if (GET_CODE (ind
) == POST_INC
12933 || GET_CODE (ind
) == PRE_DEC
12935 && (GET_CODE (ind
) == PRE_INC
12936 || GET_CODE (ind
) == POST_DEC
)))
12937 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12940 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12941 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12942 && GET_CODE (XEXP (ind
, 1)) == PLUS
12943 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12944 ind
= XEXP (ind
, 1);
12949 if (GET_CODE (ind
) == PLUS
12950 && REG_P (XEXP (ind
, 0))
12951 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12952 && CONST_INT_P (XEXP (ind
, 1))
12953 && INTVAL (XEXP (ind
, 1)) > -1024
12954 && INTVAL (XEXP (ind
, 1)) < 1024
12955 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12961 /* Return TRUE if OP is a memory operand which we can load or store a vector
12962 to/from. TYPE is one of the following values:
12963 0 - Vector load/stor (vldr)
12964 1 - Core registers (ldm)
12965 2 - Element/structure loads (vld1)
12968 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12972 /* Reject eliminable registers. */
12973 if (! (reload_in_progress
|| reload_completed
)
12974 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12975 || reg_mentioned_p (arg_pointer_rtx
, op
)
12976 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12977 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12978 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12979 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12982 /* Constants are converted into offsets from labels. */
12986 ind
= XEXP (op
, 0);
12988 if (reload_completed
12989 && (GET_CODE (ind
) == LABEL_REF
12990 || (GET_CODE (ind
) == CONST
12991 && GET_CODE (XEXP (ind
, 0)) == PLUS
12992 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12993 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12996 /* Match: (mem (reg)). */
12998 return arm_address_register_rtx_p (ind
, 0);
13000 /* Allow post-increment with Neon registers. */
13001 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13002 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13003 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13005 /* Allow post-increment by register for VLDn */
13006 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13007 && GET_CODE (XEXP (ind
, 1)) == PLUS
13008 && REG_P (XEXP (XEXP (ind
, 1), 1)))
13015 && GET_CODE (ind
) == PLUS
13016 && REG_P (XEXP (ind
, 0))
13017 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13018 && CONST_INT_P (XEXP (ind
, 1))
13019 && INTVAL (XEXP (ind
, 1)) > -1024
13020 /* For quad modes, we restrict the constant offset to be slightly less
13021 than what the instruction format permits. We have no such constraint
13022 on double mode offsets. (This must match arm_legitimate_index_p.) */
13023 && (INTVAL (XEXP (ind
, 1))
13024 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13025 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13031 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13034 neon_struct_mem_operand (rtx op
)
13038 /* Reject eliminable registers. */
13039 if (! (reload_in_progress
|| reload_completed
)
13040 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13041 || reg_mentioned_p (arg_pointer_rtx
, op
)
13042 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13043 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13044 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13045 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13048 /* Constants are converted into offsets from labels. */
13052 ind
= XEXP (op
, 0);
13054 if (reload_completed
13055 && (GET_CODE (ind
) == LABEL_REF
13056 || (GET_CODE (ind
) == CONST
13057 && GET_CODE (XEXP (ind
, 0)) == PLUS
13058 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13059 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13062 /* Match: (mem (reg)). */
13064 return arm_address_register_rtx_p (ind
, 0);
13066 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13067 if (GET_CODE (ind
) == POST_INC
13068 || GET_CODE (ind
) == PRE_DEC
)
13069 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13074 /* Return true if X is a register that will be eliminated later on. */
13076 arm_eliminable_register (rtx x
)
13078 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13079 || REGNO (x
) == ARG_POINTER_REGNUM
13080 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13081 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13084 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13085 coprocessor registers. Otherwise return NO_REGS. */
13088 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13090 if (mode
== HFmode
)
13092 if (!TARGET_NEON_FP16
)
13093 return GENERAL_REGS
;
13094 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13096 return GENERAL_REGS
;
13099 /* The neon move patterns handle all legitimate vector and struct
13102 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13103 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13104 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13105 || VALID_NEON_STRUCT_MODE (mode
)))
13108 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13111 return GENERAL_REGS
;
13114 /* Values which must be returned in the most-significant end of the return
13118 arm_return_in_msb (const_tree valtype
)
13120 return (TARGET_AAPCS_BASED
13121 && BYTES_BIG_ENDIAN
13122 && (AGGREGATE_TYPE_P (valtype
)
13123 || TREE_CODE (valtype
) == COMPLEX_TYPE
13124 || FIXED_POINT_TYPE_P (valtype
)));
13127 /* Return TRUE if X references a SYMBOL_REF. */
13129 symbol_mentioned_p (rtx x
)
13134 if (GET_CODE (x
) == SYMBOL_REF
)
13137 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13138 are constant offsets, not symbols. */
13139 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13142 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13144 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13150 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13151 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13154 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13161 /* Return TRUE if X references a LABEL_REF. */
13163 label_mentioned_p (rtx x
)
13168 if (GET_CODE (x
) == LABEL_REF
)
13171 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13172 instruction, but they are constant offsets, not symbols. */
13173 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13176 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13177 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13183 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13184 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13187 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13195 tls_mentioned_p (rtx x
)
13197 switch (GET_CODE (x
))
13200 return tls_mentioned_p (XEXP (x
, 0));
13203 if (XINT (x
, 1) == UNSPEC_TLS
)
13211 /* Must not copy any rtx that uses a pc-relative address. */
13214 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13216 /* The tls call insn cannot be copied, as it is paired with a data
13218 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13221 subrtx_iterator::array_type array
;
13222 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13224 const_rtx x
= *iter
;
13225 if (GET_CODE (x
) == UNSPEC
13226 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13227 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13234 minmax_code (rtx x
)
13236 enum rtx_code code
= GET_CODE (x
);
13249 gcc_unreachable ();
13253 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13256 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
13257 int *mask
, bool *signed_sat
)
13259 /* The high bound must be a power of two minus one. */
13260 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
13264 /* The low bound is either zero (for usat) or one less than the
13265 negation of the high bound (for ssat). */
13266 if (INTVAL (lo_bound
) == 0)
13271 *signed_sat
= false;
13276 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
13281 *signed_sat
= true;
13289 /* Return 1 if memory locations are adjacent. */
13291 adjacent_mem_locations (rtx a
, rtx b
)
13293 /* We don't guarantee to preserve the order of these memory refs. */
13294 if (volatile_refs_p (a
) || volatile_refs_p (b
))
13297 if ((REG_P (XEXP (a
, 0))
13298 || (GET_CODE (XEXP (a
, 0)) == PLUS
13299 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
13300 && (REG_P (XEXP (b
, 0))
13301 || (GET_CODE (XEXP (b
, 0)) == PLUS
13302 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
13304 HOST_WIDE_INT val0
= 0, val1
= 0;
13308 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
13310 reg0
= XEXP (XEXP (a
, 0), 0);
13311 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
13314 reg0
= XEXP (a
, 0);
13316 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
13318 reg1
= XEXP (XEXP (b
, 0), 0);
13319 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
13322 reg1
= XEXP (b
, 0);
13324 /* Don't accept any offset that will require multiple
13325 instructions to handle, since this would cause the
13326 arith_adjacentmem pattern to output an overlong sequence. */
13327 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
13330 /* Don't allow an eliminable register: register elimination can make
13331 the offset too large. */
13332 if (arm_eliminable_register (reg0
))
13335 val_diff
= val1
- val0
;
13339 /* If the target has load delay slots, then there's no benefit
13340 to using an ldm instruction unless the offset is zero and
13341 we are optimizing for size. */
13342 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
13343 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
13344 && (val_diff
== 4 || val_diff
== -4));
13347 return ((REGNO (reg0
) == REGNO (reg1
))
13348 && (val_diff
== 4 || val_diff
== -4));
13354 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13355 for load operations, false for store operations. CONSECUTIVE is true
13356 if the register numbers in the operation must be consecutive in the register
13357 bank. RETURN_PC is true if value is to be loaded in PC.
13358 The pattern we are trying to match for load is:
13359 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13360 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13363 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13366 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13367 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13368 3. If consecutive is TRUE, then for kth register being loaded,
13369 REGNO (R_dk) = REGNO (R_d0) + k.
13370 The pattern for store is similar. */
13372 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
13373 bool consecutive
, bool return_pc
)
13375 HOST_WIDE_INT count
= XVECLEN (op
, 0);
13376 rtx reg
, mem
, addr
;
13378 unsigned first_regno
;
13379 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
13381 bool addr_reg_in_reglist
= false;
13382 bool update
= false;
13387 /* If not in SImode, then registers must be consecutive
13388 (e.g., VLDM instructions for DFmode). */
13389 gcc_assert ((mode
== SImode
) || consecutive
);
13390 /* Setting return_pc for stores is illegal. */
13391 gcc_assert (!return_pc
|| load
);
13393 /* Set up the increments and the regs per val based on the mode. */
13394 reg_increment
= GET_MODE_SIZE (mode
);
13395 regs_per_val
= reg_increment
/ 4;
13396 offset_adj
= return_pc
? 1 : 0;
13399 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
13400 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
13403 /* Check if this is a write-back. */
13404 elt
= XVECEXP (op
, 0, offset_adj
);
13405 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
13411 /* The offset adjustment must be the number of registers being
13412 popped times the size of a single register. */
13413 if (!REG_P (SET_DEST (elt
))
13414 || !REG_P (XEXP (SET_SRC (elt
), 0))
13415 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
13416 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
13417 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
13418 ((count
- 1 - offset_adj
) * reg_increment
))
13422 i
= i
+ offset_adj
;
13423 base
= base
+ offset_adj
;
13424 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13425 success depends on the type: VLDM can do just one reg,
13426 LDM must do at least two. */
13427 if ((count
<= i
) && (mode
== SImode
))
13430 elt
= XVECEXP (op
, 0, i
- 1);
13431 if (GET_CODE (elt
) != SET
)
13436 reg
= SET_DEST (elt
);
13437 mem
= SET_SRC (elt
);
13441 reg
= SET_SRC (elt
);
13442 mem
= SET_DEST (elt
);
13445 if (!REG_P (reg
) || !MEM_P (mem
))
13448 regno
= REGNO (reg
);
13449 first_regno
= regno
;
13450 addr
= XEXP (mem
, 0);
13451 if (GET_CODE (addr
) == PLUS
)
13453 if (!CONST_INT_P (XEXP (addr
, 1)))
13456 offset
= INTVAL (XEXP (addr
, 1));
13457 addr
= XEXP (addr
, 0);
13463 /* Don't allow SP to be loaded unless it is also the base register. It
13464 guarantees that SP is reset correctly when an LDM instruction
13465 is interrupted. Otherwise, we might end up with a corrupt stack. */
13466 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13469 for (; i
< count
; i
++)
13471 elt
= XVECEXP (op
, 0, i
);
13472 if (GET_CODE (elt
) != SET
)
13477 reg
= SET_DEST (elt
);
13478 mem
= SET_SRC (elt
);
13482 reg
= SET_SRC (elt
);
13483 mem
= SET_DEST (elt
);
13487 || GET_MODE (reg
) != mode
13488 || REGNO (reg
) <= regno
13491 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
13492 /* Don't allow SP to be loaded unless it is also the base register. It
13493 guarantees that SP is reset correctly when an LDM instruction
13494 is interrupted. Otherwise, we might end up with a corrupt stack. */
13495 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
13497 || GET_MODE (mem
) != mode
13498 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
13499 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
13500 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13501 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13502 offset
+ (i
- base
) * reg_increment
))
13503 && (!REG_P (XEXP (mem
, 0))
13504 || offset
+ (i
- base
) * reg_increment
!= 0)))
13507 regno
= REGNO (reg
);
13508 if (regno
== REGNO (addr
))
13509 addr_reg_in_reglist
= true;
13514 if (update
&& addr_reg_in_reglist
)
13517 /* For Thumb-1, address register is always modified - either by write-back
13518 or by explicit load. If the pattern does not describe an update,
13519 then the address register must be in the list of loaded registers. */
13521 return update
|| addr_reg_in_reglist
;
13527 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13528 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13529 instruction. ADD_OFFSET is nonzero if the base address register needs
13530 to be modified with an add instruction before we can use it. */
13533 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13534 int nops
, HOST_WIDE_INT add_offset
)
13536 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13537 if the offset isn't small enough. The reason 2 ldrs are faster
13538 is because these ARMs are able to do more than one cache access
13539 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13540 whilst the ARM8 has a double bandwidth cache. This means that
13541 these cores can do both an instruction fetch and a data fetch in
13542 a single cycle, so the trick of calculating the address into a
13543 scratch register (one of the result regs) and then doing a load
13544 multiple actually becomes slower (and no smaller in code size).
13545 That is the transformation
13547 ldr rd1, [rbase + offset]
13548 ldr rd2, [rbase + offset + 4]
13552 add rd1, rbase, offset
13553 ldmia rd1, {rd1, rd2}
13555 produces worse code -- '3 cycles + any stalls on rd2' instead of
13556 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13557 access per cycle, the first sequence could never complete in less
13558 than 6 cycles, whereas the ldm sequence would only take 5 and
13559 would make better use of sequential accesses if not hitting the
13562 We cheat here and test 'arm_ld_sched' which we currently know to
13563 only be true for the ARM8, ARM9 and StrongARM. If this ever
13564 changes, then the test below needs to be reworked. */
13565 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13568 /* XScale has load-store double instructions, but they have stricter
13569 alignment requirements than load-store multiple, so we cannot
13572 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13573 the pipeline until completion.
13581 An ldr instruction takes 1-3 cycles, but does not block the
13590 Best case ldr will always win. However, the more ldr instructions
13591 we issue, the less likely we are to be able to schedule them well.
13592 Using ldr instructions also increases code size.
13594 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13595 for counts of 3 or 4 regs. */
13596 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13601 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13602 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13603 an array ORDER which describes the sequence to use when accessing the
13604 offsets that produces an ascending order. In this sequence, each
13605 offset must be larger by exactly 4 than the previous one. ORDER[0]
13606 must have been filled in with the lowest offset by the caller.
13607 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13608 we use to verify that ORDER produces an ascending order of registers.
13609 Return true if it was possible to construct such an order, false if
13613 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13614 int *unsorted_regs
)
13617 for (i
= 1; i
< nops
; i
++)
13621 order
[i
] = order
[i
- 1];
13622 for (j
= 0; j
< nops
; j
++)
13623 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13625 /* We must find exactly one offset that is higher than the
13626 previous one by 4. */
13627 if (order
[i
] != order
[i
- 1])
13631 if (order
[i
] == order
[i
- 1])
13633 /* The register numbers must be ascending. */
13634 if (unsorted_regs
!= NULL
13635 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13641 /* Used to determine in a peephole whether a sequence of load
13642 instructions can be changed into a load-multiple instruction.
13643 NOPS is the number of separate load instructions we are examining. The
13644 first NOPS entries in OPERANDS are the destination registers, the
13645 next NOPS entries are memory operands. If this function is
13646 successful, *BASE is set to the common base register of the memory
13647 accesses; *LOAD_OFFSET is set to the first memory location's offset
13648 from that base register.
13649 REGS is an array filled in with the destination register numbers.
13650 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13651 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13652 the sequence of registers in REGS matches the loads from ascending memory
13653 locations, and the function verifies that the register numbers are
13654 themselves ascending. If CHECK_REGS is false, the register numbers
13655 are stored in the order they are found in the operands. */
13657 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13658 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13660 int unsorted_regs
[MAX_LDM_STM_OPS
];
13661 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13662 int order
[MAX_LDM_STM_OPS
];
13663 rtx base_reg_rtx
= NULL
;
13667 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13668 easily extended if required. */
13669 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13671 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13673 /* Loop over the operands and check that the memory references are
13674 suitable (i.e. immediate offsets from the same base register). At
13675 the same time, extract the target register, and the memory
13677 for (i
= 0; i
< nops
; i
++)
13682 /* Convert a subreg of a mem into the mem itself. */
13683 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13684 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13686 gcc_assert (MEM_P (operands
[nops
+ i
]));
13688 /* Don't reorder volatile memory references; it doesn't seem worth
13689 looking for the case where the order is ok anyway. */
13690 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13693 offset
= const0_rtx
;
13695 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13696 || (GET_CODE (reg
) == SUBREG
13697 && REG_P (reg
= SUBREG_REG (reg
))))
13698 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13699 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13700 || (GET_CODE (reg
) == SUBREG
13701 && REG_P (reg
= SUBREG_REG (reg
))))
13702 && (CONST_INT_P (offset
13703 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13707 base_reg
= REGNO (reg
);
13708 base_reg_rtx
= reg
;
13709 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13712 else if (base_reg
!= (int) REGNO (reg
))
13713 /* Not addressed from the same base register. */
13716 unsorted_regs
[i
] = (REG_P (operands
[i
])
13717 ? REGNO (operands
[i
])
13718 : REGNO (SUBREG_REG (operands
[i
])));
13720 /* If it isn't an integer register, or if it overwrites the
13721 base register but isn't the last insn in the list, then
13722 we can't do this. */
13723 if (unsorted_regs
[i
] < 0
13724 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13725 || unsorted_regs
[i
] > 14
13726 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13729 /* Don't allow SP to be loaded unless it is also the base
13730 register. It guarantees that SP is reset correctly when
13731 an LDM instruction is interrupted. Otherwise, we might
13732 end up with a corrupt stack. */
13733 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13736 unsorted_offsets
[i
] = INTVAL (offset
);
13737 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13741 /* Not a suitable memory address. */
13745 /* All the useful information has now been extracted from the
13746 operands into unsorted_regs and unsorted_offsets; additionally,
13747 order[0] has been set to the lowest offset in the list. Sort
13748 the offsets into order, verifying that they are adjacent, and
13749 check that the register numbers are ascending. */
13750 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13751 check_regs
? unsorted_regs
: NULL
))
13755 memcpy (saved_order
, order
, sizeof order
);
13761 for (i
= 0; i
< nops
; i
++)
13762 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13764 *load_offset
= unsorted_offsets
[order
[0]];
13768 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13771 if (unsorted_offsets
[order
[0]] == 0)
13772 ldm_case
= 1; /* ldmia */
13773 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13774 ldm_case
= 2; /* ldmib */
13775 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13776 ldm_case
= 3; /* ldmda */
13777 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13778 ldm_case
= 4; /* ldmdb */
13779 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13780 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13785 if (!multiple_operation_profitable_p (false, nops
,
13787 ? unsorted_offsets
[order
[0]] : 0))
13793 /* Used to determine in a peephole whether a sequence of store instructions can
13794 be changed into a store-multiple instruction.
13795 NOPS is the number of separate store instructions we are examining.
13796 NOPS_TOTAL is the total number of instructions recognized by the peephole
13798 The first NOPS entries in OPERANDS are the source registers, the next
13799 NOPS entries are memory operands. If this function is successful, *BASE is
13800 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13801 to the first memory location's offset from that base register. REGS is an
13802 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13803 likewise filled with the corresponding rtx's.
13804 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13805 numbers to an ascending order of stores.
13806 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13807 from ascending memory locations, and the function verifies that the register
13808 numbers are themselves ascending. If CHECK_REGS is false, the register
13809 numbers are stored in the order they are found in the operands. */
13811 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13812 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13813 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13815 int unsorted_regs
[MAX_LDM_STM_OPS
];
13816 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13817 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13818 int order
[MAX_LDM_STM_OPS
];
13820 rtx base_reg_rtx
= NULL
;
13823 /* Write back of base register is currently only supported for Thumb 1. */
13824 int base_writeback
= TARGET_THUMB1
;
13826 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13827 easily extended if required. */
13828 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13830 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13832 /* Loop over the operands and check that the memory references are
13833 suitable (i.e. immediate offsets from the same base register). At
13834 the same time, extract the target register, and the memory
13836 for (i
= 0; i
< nops
; i
++)
13841 /* Convert a subreg of a mem into the mem itself. */
13842 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13843 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13845 gcc_assert (MEM_P (operands
[nops
+ i
]));
13847 /* Don't reorder volatile memory references; it doesn't seem worth
13848 looking for the case where the order is ok anyway. */
13849 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13852 offset
= const0_rtx
;
13854 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13855 || (GET_CODE (reg
) == SUBREG
13856 && REG_P (reg
= SUBREG_REG (reg
))))
13857 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13858 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13859 || (GET_CODE (reg
) == SUBREG
13860 && REG_P (reg
= SUBREG_REG (reg
))))
13861 && (CONST_INT_P (offset
13862 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13864 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13865 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13866 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13870 base_reg
= REGNO (reg
);
13871 base_reg_rtx
= reg
;
13872 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13875 else if (base_reg
!= (int) REGNO (reg
))
13876 /* Not addressed from the same base register. */
13879 /* If it isn't an integer register, then we can't do this. */
13880 if (unsorted_regs
[i
] < 0
13881 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13882 /* The effects are unpredictable if the base register is
13883 both updated and stored. */
13884 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13885 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13886 || unsorted_regs
[i
] > 14)
13889 unsorted_offsets
[i
] = INTVAL (offset
);
13890 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13894 /* Not a suitable memory address. */
13898 /* All the useful information has now been extracted from the
13899 operands into unsorted_regs and unsorted_offsets; additionally,
13900 order[0] has been set to the lowest offset in the list. Sort
13901 the offsets into order, verifying that they are adjacent, and
13902 check that the register numbers are ascending. */
13903 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13904 check_regs
? unsorted_regs
: NULL
))
13908 memcpy (saved_order
, order
, sizeof order
);
13914 for (i
= 0; i
< nops
; i
++)
13916 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13918 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13921 *load_offset
= unsorted_offsets
[order
[0]];
13925 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13928 if (unsorted_offsets
[order
[0]] == 0)
13929 stm_case
= 1; /* stmia */
13930 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13931 stm_case
= 2; /* stmib */
13932 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13933 stm_case
= 3; /* stmda */
13934 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13935 stm_case
= 4; /* stmdb */
13939 if (!multiple_operation_profitable_p (false, nops
, 0))
13945 /* Routines for use in generating RTL. */
13947 /* Generate a load-multiple instruction. COUNT is the number of loads in
13948 the instruction; REGS and MEMS are arrays containing the operands.
13949 BASEREG is the base register to be used in addressing the memory operands.
13950 WBACK_OFFSET is nonzero if the instruction should update the base
13954 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13955 HOST_WIDE_INT wback_offset
)
13960 if (!multiple_operation_profitable_p (false, count
, 0))
13966 for (i
= 0; i
< count
; i
++)
13967 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13969 if (wback_offset
!= 0)
13970 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13972 seq
= get_insns ();
13978 result
= gen_rtx_PARALLEL (VOIDmode
,
13979 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13980 if (wback_offset
!= 0)
13982 XVECEXP (result
, 0, 0)
13983 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13988 for (j
= 0; i
< count
; i
++, j
++)
13989 XVECEXP (result
, 0, i
)
13990 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13995 /* Generate a store-multiple instruction. COUNT is the number of stores in
13996 the instruction; REGS and MEMS are arrays containing the operands.
13997 BASEREG is the base register to be used in addressing the memory operands.
13998 WBACK_OFFSET is nonzero if the instruction should update the base
14002 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14003 HOST_WIDE_INT wback_offset
)
14008 if (GET_CODE (basereg
) == PLUS
)
14009 basereg
= XEXP (basereg
, 0);
14011 if (!multiple_operation_profitable_p (false, count
, 0))
14017 for (i
= 0; i
< count
; i
++)
14018 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14020 if (wback_offset
!= 0)
14021 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14023 seq
= get_insns ();
14029 result
= gen_rtx_PARALLEL (VOIDmode
,
14030 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14031 if (wback_offset
!= 0)
14033 XVECEXP (result
, 0, 0)
14034 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14039 for (j
= 0; i
< count
; i
++, j
++)
14040 XVECEXP (result
, 0, i
)
14041 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14046 /* Generate either a load-multiple or a store-multiple instruction. This
14047 function can be used in situations where we can start with a single MEM
14048 rtx and adjust its address upwards.
14049 COUNT is the number of operations in the instruction, not counting a
14050 possible update of the base register. REGS is an array containing the
14052 BASEREG is the base register to be used in addressing the memory operands,
14053 which are constructed from BASEMEM.
14054 WRITE_BACK specifies whether the generated instruction should include an
14055 update of the base register.
14056 OFFSETP is used to pass an offset to and from this function; this offset
14057 is not used when constructing the address (instead BASEMEM should have an
14058 appropriate offset in its address), it is used only for setting
14059 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14062 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14063 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14065 rtx mems
[MAX_LDM_STM_OPS
];
14066 HOST_WIDE_INT offset
= *offsetp
;
14069 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14071 if (GET_CODE (basereg
) == PLUS
)
14072 basereg
= XEXP (basereg
, 0);
14074 for (i
= 0; i
< count
; i
++)
14076 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14077 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14085 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14086 write_back
? 4 * count
: 0);
14088 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14089 write_back
? 4 * count
: 0);
14093 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14094 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14096 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14101 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14102 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14104 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14108 /* Called from a peephole2 expander to turn a sequence of loads into an
14109 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14110 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14111 is true if we can reorder the registers because they are used commutatively
14113 Returns true iff we could generate a new instruction. */
14116 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14118 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14119 rtx mems
[MAX_LDM_STM_OPS
];
14120 int i
, j
, base_reg
;
14122 HOST_WIDE_INT offset
;
14123 int write_back
= FALSE
;
14127 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14128 &base_reg
, &offset
, !sort_regs
);
14134 for (i
= 0; i
< nops
- 1; i
++)
14135 for (j
= i
+ 1; j
< nops
; j
++)
14136 if (regs
[i
] > regs
[j
])
14142 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14146 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
14147 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14153 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
14154 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
14156 if (!TARGET_THUMB1
)
14158 base_reg
= regs
[0];
14159 base_reg_rtx
= newbase
;
14163 for (i
= 0; i
< nops
; i
++)
14165 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14166 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14169 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14170 write_back
? offset
+ i
* 4 : 0));
14174 /* Called from a peephole2 expander to turn a sequence of stores into an
14175 STM instruction. OPERANDS are the operands found by the peephole matcher;
14176 NOPS indicates how many separate stores we are trying to combine.
14177 Returns true iff we could generate a new instruction. */
14180 gen_stm_seq (rtx
*operands
, int nops
)
14183 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14184 rtx mems
[MAX_LDM_STM_OPS
];
14187 HOST_WIDE_INT offset
;
14188 int write_back
= FALSE
;
14191 bool base_reg_dies
;
14193 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
14194 mem_order
, &base_reg
, &offset
, true);
14199 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14201 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
14204 gcc_assert (base_reg_dies
);
14210 gcc_assert (base_reg_dies
);
14211 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14215 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14217 for (i
= 0; i
< nops
; i
++)
14219 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14220 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14223 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
14224 write_back
? offset
+ i
* 4 : 0));
14228 /* Called from a peephole2 expander to turn a sequence of stores that are
14229 preceded by constant loads into an STM instruction. OPERANDS are the
14230 operands found by the peephole matcher; NOPS indicates how many
14231 separate stores we are trying to combine; there are 2 * NOPS
14232 instructions in the peephole.
14233 Returns true iff we could generate a new instruction. */
14236 gen_const_stm_seq (rtx
*operands
, int nops
)
14238 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
14239 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14240 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
14241 rtx mems
[MAX_LDM_STM_OPS
];
14244 HOST_WIDE_INT offset
;
14245 int write_back
= FALSE
;
14248 bool base_reg_dies
;
14250 HARD_REG_SET allocated
;
14252 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
14253 mem_order
, &base_reg
, &offset
, false);
14258 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
14260 /* If the same register is used more than once, try to find a free
14262 CLEAR_HARD_REG_SET (allocated
);
14263 for (i
= 0; i
< nops
; i
++)
14265 for (j
= i
+ 1; j
< nops
; j
++)
14266 if (regs
[i
] == regs
[j
])
14268 rtx t
= peep2_find_free_register (0, nops
* 2,
14269 TARGET_THUMB1
? "l" : "r",
14270 SImode
, &allocated
);
14274 regs
[i
] = REGNO (t
);
14278 /* Compute an ordering that maps the register numbers to an ascending
14281 for (i
= 0; i
< nops
; i
++)
14282 if (regs
[i
] < regs
[reg_order
[0]])
14285 for (i
= 1; i
< nops
; i
++)
14287 int this_order
= reg_order
[i
- 1];
14288 for (j
= 0; j
< nops
; j
++)
14289 if (regs
[j
] > regs
[reg_order
[i
- 1]]
14290 && (this_order
== reg_order
[i
- 1]
14291 || regs
[j
] < regs
[this_order
]))
14293 reg_order
[i
] = this_order
;
14296 /* Ensure that registers that must be live after the instruction end
14297 up with the correct value. */
14298 for (i
= 0; i
< nops
; i
++)
14300 int this_order
= reg_order
[i
];
14301 if ((this_order
!= mem_order
[i
]
14302 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
14303 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
14307 /* Load the constants. */
14308 for (i
= 0; i
< nops
; i
++)
14310 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
14311 sorted_regs
[i
] = regs
[reg_order
[i
]];
14312 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
14315 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14317 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
14320 gcc_assert (base_reg_dies
);
14326 gcc_assert (base_reg_dies
);
14327 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
14331 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
14333 for (i
= 0; i
< nops
; i
++)
14335 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
14336 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
14339 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
14340 write_back
? offset
+ i
* 4 : 0));
14344 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14345 unaligned copies on processors which support unaligned semantics for those
14346 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14347 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14348 An interleave factor of 1 (the minimum) will perform no interleaving.
14349 Load/store multiple are used for aligned addresses where possible. */
14352 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
14353 HOST_WIDE_INT length
,
14354 unsigned int interleave_factor
)
14356 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
14357 int *regnos
= XALLOCAVEC (int, interleave_factor
);
14358 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
14359 HOST_WIDE_INT i
, j
;
14360 HOST_WIDE_INT remaining
= length
, words
;
14361 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
14363 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
14364 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
14365 HOST_WIDE_INT srcoffset
, dstoffset
;
14366 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
14369 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
14371 /* Use hard registers if we have aligned source or destination so we can use
14372 load/store multiple with contiguous registers. */
14373 if (dst_aligned
|| src_aligned
)
14374 for (i
= 0; i
< interleave_factor
; i
++)
14375 regs
[i
] = gen_rtx_REG (SImode
, i
);
14377 for (i
= 0; i
< interleave_factor
; i
++)
14378 regs
[i
] = gen_reg_rtx (SImode
);
14380 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
14381 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
14383 srcoffset
= dstoffset
= 0;
14385 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14386 For copying the last bytes we want to subtract this offset again. */
14387 src_autoinc
= dst_autoinc
= 0;
14389 for (i
= 0; i
< interleave_factor
; i
++)
14392 /* Copy BLOCK_SIZE_BYTES chunks. */
14394 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
14397 if (src_aligned
&& interleave_factor
> 1)
14399 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
14400 TRUE
, srcbase
, &srcoffset
));
14401 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14405 for (j
= 0; j
< interleave_factor
; j
++)
14407 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
14409 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14410 srcoffset
+ j
* UNITS_PER_WORD
);
14411 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14413 srcoffset
+= block_size_bytes
;
14417 if (dst_aligned
&& interleave_factor
> 1)
14419 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
14420 TRUE
, dstbase
, &dstoffset
));
14421 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
14425 for (j
= 0; j
< interleave_factor
; j
++)
14427 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
14429 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14430 dstoffset
+ j
* UNITS_PER_WORD
);
14431 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14433 dstoffset
+= block_size_bytes
;
14436 remaining
-= block_size_bytes
;
14439 /* Copy any whole words left (note these aren't interleaved with any
14440 subsequent halfword/byte load/stores in the interests of simplicity). */
14442 words
= remaining
/ UNITS_PER_WORD
;
14444 gcc_assert (words
< interleave_factor
);
14446 if (src_aligned
&& words
> 1)
14448 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
14450 src_autoinc
+= UNITS_PER_WORD
* words
;
14454 for (j
= 0; j
< words
; j
++)
14456 addr
= plus_constant (Pmode
, src
,
14457 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
14458 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
14459 srcoffset
+ j
* UNITS_PER_WORD
);
14461 emit_move_insn (regs
[j
], mem
);
14463 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
14465 srcoffset
+= words
* UNITS_PER_WORD
;
14468 if (dst_aligned
&& words
> 1)
14470 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
14472 dst_autoinc
+= words
* UNITS_PER_WORD
;
14476 for (j
= 0; j
< words
; j
++)
14478 addr
= plus_constant (Pmode
, dst
,
14479 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
14480 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
14481 dstoffset
+ j
* UNITS_PER_WORD
);
14483 emit_move_insn (mem
, regs
[j
]);
14485 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
14487 dstoffset
+= words
* UNITS_PER_WORD
;
14490 remaining
-= words
* UNITS_PER_WORD
;
14492 gcc_assert (remaining
< 4);
14494 /* Copy a halfword if necessary. */
14496 if (remaining
>= 2)
14498 halfword_tmp
= gen_reg_rtx (SImode
);
14500 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14501 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
14502 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14504 /* Either write out immediately, or delay until we've loaded the last
14505 byte, depending on interleave factor. */
14506 if (interleave_factor
== 1)
14508 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14509 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14510 emit_insn (gen_unaligned_storehi (mem
,
14511 gen_lowpart (HImode
, halfword_tmp
)));
14512 halfword_tmp
= NULL
;
14520 gcc_assert (remaining
< 2);
14522 /* Copy last byte. */
14524 if ((remaining
& 1) != 0)
14526 byte_tmp
= gen_reg_rtx (SImode
);
14528 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14529 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14530 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14532 if (interleave_factor
== 1)
14534 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14535 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14536 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14545 /* Store last halfword if we haven't done so already. */
14549 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14550 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14551 emit_insn (gen_unaligned_storehi (mem
,
14552 gen_lowpart (HImode
, halfword_tmp
)));
14556 /* Likewise for last byte. */
14560 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14561 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14562 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14566 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14569 /* From mips_adjust_block_mem:
14571 Helper function for doing a loop-based block operation on memory
14572 reference MEM. Each iteration of the loop will operate on LENGTH
14575 Create a new base register for use within the loop and point it to
14576 the start of MEM. Create a new memory reference that uses this
14577 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14580 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14583 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14585 /* Although the new mem does not refer to a known location,
14586 it does keep up to LENGTH bytes of alignment. */
14587 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14588 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14591 /* From mips_block_move_loop:
14593 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14594 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14595 the memory regions do not overlap. */
14598 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14599 unsigned int interleave_factor
,
14600 HOST_WIDE_INT bytes_per_iter
)
14602 rtx src_reg
, dest_reg
, final_src
, test
;
14603 HOST_WIDE_INT leftover
;
14605 leftover
= length
% bytes_per_iter
;
14606 length
-= leftover
;
14608 /* Create registers and memory references for use within the loop. */
14609 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14610 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14612 /* Calculate the value that SRC_REG should have after the last iteration of
14614 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14615 0, 0, OPTAB_WIDEN
);
14617 /* Emit the start of the loop. */
14618 rtx_code_label
*label
= gen_label_rtx ();
14619 emit_label (label
);
14621 /* Emit the loop body. */
14622 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14623 interleave_factor
);
14625 /* Move on to the next block. */
14626 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14627 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14629 /* Emit the loop condition. */
14630 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14631 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14633 /* Mop up any left-over bytes. */
14635 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14638 /* Emit a block move when either the source or destination is unaligned (not
14639 aligned to a four-byte boundary). This may need further tuning depending on
14640 core type, optimize_size setting, etc. */
14643 arm_movmemqi_unaligned (rtx
*operands
)
14645 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14649 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14650 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14651 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14652 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14653 or dst_aligned though: allow more interleaving in those cases since the
14654 resulting code can be smaller. */
14655 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14656 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14659 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14660 interleave_factor
, bytes_per_iter
);
14662 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14663 interleave_factor
);
14667 /* Note that the loop created by arm_block_move_unaligned_loop may be
14668 subject to loop unrolling, which makes tuning this condition a little
14671 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14673 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14680 arm_gen_movmemqi (rtx
*operands
)
14682 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14683 HOST_WIDE_INT srcoffset
, dstoffset
;
14685 rtx src
, dst
, srcbase
, dstbase
;
14686 rtx part_bytes_reg
= NULL
;
14689 if (!CONST_INT_P (operands
[2])
14690 || !CONST_INT_P (operands
[3])
14691 || INTVAL (operands
[2]) > 64)
14694 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14695 return arm_movmemqi_unaligned (operands
);
14697 if (INTVAL (operands
[3]) & 3)
14700 dstbase
= operands
[0];
14701 srcbase
= operands
[1];
14703 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14704 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14706 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14707 out_words_to_go
= INTVAL (operands
[2]) / 4;
14708 last_bytes
= INTVAL (operands
[2]) & 3;
14709 dstoffset
= srcoffset
= 0;
14711 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14712 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14714 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14716 if (in_words_to_go
> 4)
14717 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14718 TRUE
, srcbase
, &srcoffset
));
14720 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14721 src
, FALSE
, srcbase
,
14724 if (out_words_to_go
)
14726 if (out_words_to_go
> 4)
14727 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14728 TRUE
, dstbase
, &dstoffset
));
14729 else if (out_words_to_go
!= 1)
14730 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14731 out_words_to_go
, dst
,
14734 dstbase
, &dstoffset
));
14737 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14738 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14739 if (last_bytes
!= 0)
14741 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14747 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14748 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14751 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14752 if (out_words_to_go
)
14756 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14757 sreg
= copy_to_reg (mem
);
14759 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14760 emit_move_insn (mem
, sreg
);
14763 gcc_assert (!in_words_to_go
); /* Sanity check */
14766 if (in_words_to_go
)
14768 gcc_assert (in_words_to_go
> 0);
14770 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14771 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14774 gcc_assert (!last_bytes
|| part_bytes_reg
);
14776 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14778 rtx tmp
= gen_reg_rtx (SImode
);
14780 /* The bytes we want are in the top end of the word. */
14781 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14782 GEN_INT (8 * (4 - last_bytes
))));
14783 part_bytes_reg
= tmp
;
14787 mem
= adjust_automodify_address (dstbase
, QImode
,
14788 plus_constant (Pmode
, dst
,
14790 dstoffset
+ last_bytes
- 1);
14791 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14795 tmp
= gen_reg_rtx (SImode
);
14796 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14797 part_bytes_reg
= tmp
;
14804 if (last_bytes
> 1)
14806 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14807 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14811 rtx tmp
= gen_reg_rtx (SImode
);
14812 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14813 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14814 part_bytes_reg
= tmp
;
14821 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14822 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14829 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14832 next_consecutive_mem (rtx mem
)
14834 machine_mode mode
= GET_MODE (mem
);
14835 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14836 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14838 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14841 /* Copy using LDRD/STRD instructions whenever possible.
14842 Returns true upon success. */
14844 gen_movmem_ldrd_strd (rtx
*operands
)
14846 unsigned HOST_WIDE_INT len
;
14847 HOST_WIDE_INT align
;
14848 rtx src
, dst
, base
;
14850 bool src_aligned
, dst_aligned
;
14851 bool src_volatile
, dst_volatile
;
14853 gcc_assert (CONST_INT_P (operands
[2]));
14854 gcc_assert (CONST_INT_P (operands
[3]));
14856 len
= UINTVAL (operands
[2]);
14860 /* Maximum alignment we can assume for both src and dst buffers. */
14861 align
= INTVAL (operands
[3]);
14863 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14866 /* Place src and dst addresses in registers
14867 and update the corresponding mem rtx. */
14869 dst_volatile
= MEM_VOLATILE_P (dst
);
14870 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14871 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14872 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14875 src_volatile
= MEM_VOLATILE_P (src
);
14876 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14877 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14878 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14880 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14883 if (src_volatile
|| dst_volatile
)
14886 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14887 if (!(dst_aligned
|| src_aligned
))
14888 return arm_gen_movmemqi (operands
);
14890 src
= adjust_address (src
, DImode
, 0);
14891 dst
= adjust_address (dst
, DImode
, 0);
14895 reg0
= gen_reg_rtx (DImode
);
14897 emit_move_insn (reg0
, src
);
14899 emit_insn (gen_unaligned_loaddi (reg0
, src
));
14902 emit_move_insn (dst
, reg0
);
14904 emit_insn (gen_unaligned_storedi (dst
, reg0
));
14906 src
= next_consecutive_mem (src
);
14907 dst
= next_consecutive_mem (dst
);
14910 gcc_assert (len
< 8);
14913 /* More than a word but less than a double-word to copy. Copy a word. */
14914 reg0
= gen_reg_rtx (SImode
);
14915 src
= adjust_address (src
, SImode
, 0);
14916 dst
= adjust_address (dst
, SImode
, 0);
14918 emit_move_insn (reg0
, src
);
14920 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14923 emit_move_insn (dst
, reg0
);
14925 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14927 src
= next_consecutive_mem (src
);
14928 dst
= next_consecutive_mem (dst
);
14935 /* Copy the remaining bytes. */
14938 dst
= adjust_address (dst
, HImode
, 0);
14939 src
= adjust_address (src
, HImode
, 0);
14940 reg0
= gen_reg_rtx (SImode
);
14942 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14944 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14947 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14949 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14951 src
= next_consecutive_mem (src
);
14952 dst
= next_consecutive_mem (dst
);
14957 dst
= adjust_address (dst
, QImode
, 0);
14958 src
= adjust_address (src
, QImode
, 0);
14959 reg0
= gen_reg_rtx (QImode
);
14960 emit_move_insn (reg0
, src
);
14961 emit_move_insn (dst
, reg0
);
14965 /* Select a dominance comparison mode if possible for a test of the general
14966 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14967 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14968 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14969 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14970 In all cases OP will be either EQ or NE, but we don't need to know which
14971 here. If we are unable to support a dominance comparison we return
14972 CC mode. This will then fail to match for the RTL expressions that
14973 generate this call. */
14975 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14977 enum rtx_code cond1
, cond2
;
14980 /* Currently we will probably get the wrong result if the individual
14981 comparisons are not simple. This also ensures that it is safe to
14982 reverse a comparison if necessary. */
14983 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14985 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14989 /* The if_then_else variant of this tests the second condition if the
14990 first passes, but is true if the first fails. Reverse the first
14991 condition to get a true "inclusive-or" expression. */
14992 if (cond_or
== DOM_CC_NX_OR_Y
)
14993 cond1
= reverse_condition (cond1
);
14995 /* If the comparisons are not equal, and one doesn't dominate the other,
14996 then we can't do this. */
14998 && !comparison_dominates_p (cond1
, cond2
)
14999 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15003 std::swap (cond1
, cond2
);
15008 if (cond_or
== DOM_CC_X_AND_Y
)
15013 case EQ
: return CC_DEQmode
;
15014 case LE
: return CC_DLEmode
;
15015 case LEU
: return CC_DLEUmode
;
15016 case GE
: return CC_DGEmode
;
15017 case GEU
: return CC_DGEUmode
;
15018 default: gcc_unreachable ();
15022 if (cond_or
== DOM_CC_X_AND_Y
)
15034 gcc_unreachable ();
15038 if (cond_or
== DOM_CC_X_AND_Y
)
15050 gcc_unreachable ();
15054 if (cond_or
== DOM_CC_X_AND_Y
)
15055 return CC_DLTUmode
;
15060 return CC_DLTUmode
;
15062 return CC_DLEUmode
;
15066 gcc_unreachable ();
15070 if (cond_or
== DOM_CC_X_AND_Y
)
15071 return CC_DGTUmode
;
15076 return CC_DGTUmode
;
15078 return CC_DGEUmode
;
15082 gcc_unreachable ();
15085 /* The remaining cases only occur when both comparisons are the
15088 gcc_assert (cond1
== cond2
);
15092 gcc_assert (cond1
== cond2
);
15096 gcc_assert (cond1
== cond2
);
15100 gcc_assert (cond1
== cond2
);
15101 return CC_DLEUmode
;
15104 gcc_assert (cond1
== cond2
);
15105 return CC_DGEUmode
;
15108 gcc_unreachable ();
15113 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
15115 /* All floating point compares return CCFP if it is an equality
15116 comparison, and CCFPE otherwise. */
15117 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
15140 gcc_unreachable ();
15144 /* A compare with a shifted operand. Because of canonicalization, the
15145 comparison will have to be swapped when we emit the assembler. */
15146 if (GET_MODE (y
) == SImode
15147 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15148 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15149 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
15150 || GET_CODE (x
) == ROTATERT
))
15153 /* This operation is performed swapped, but since we only rely on the Z
15154 flag we don't need an additional mode. */
15155 if (GET_MODE (y
) == SImode
15156 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
15157 && GET_CODE (x
) == NEG
15158 && (op
== EQ
|| op
== NE
))
15161 /* This is a special case that is used by combine to allow a
15162 comparison of a shifted byte load to be split into a zero-extend
15163 followed by a comparison of the shifted integer (only valid for
15164 equalities and unsigned inequalities). */
15165 if (GET_MODE (x
) == SImode
15166 && GET_CODE (x
) == ASHIFT
15167 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
15168 && GET_CODE (XEXP (x
, 0)) == SUBREG
15169 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
15170 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
15171 && (op
== EQ
|| op
== NE
15172 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
15173 && CONST_INT_P (y
))
15176 /* A construct for a conditional compare, if the false arm contains
15177 0, then both conditions must be true, otherwise either condition
15178 must be true. Not all conditions are possible, so CCmode is
15179 returned if it can't be done. */
15180 if (GET_CODE (x
) == IF_THEN_ELSE
15181 && (XEXP (x
, 2) == const0_rtx
15182 || XEXP (x
, 2) == const1_rtx
)
15183 && COMPARISON_P (XEXP (x
, 0))
15184 && COMPARISON_P (XEXP (x
, 1)))
15185 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15186 INTVAL (XEXP (x
, 2)));
15188 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15189 if (GET_CODE (x
) == AND
15190 && (op
== EQ
|| op
== NE
)
15191 && COMPARISON_P (XEXP (x
, 0))
15192 && COMPARISON_P (XEXP (x
, 1)))
15193 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15196 if (GET_CODE (x
) == IOR
15197 && (op
== EQ
|| op
== NE
)
15198 && COMPARISON_P (XEXP (x
, 0))
15199 && COMPARISON_P (XEXP (x
, 1)))
15200 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
15203 /* An operation (on Thumb) where we want to test for a single bit.
15204 This is done by shifting that bit up into the top bit of a
15205 scratch register; we can then branch on the sign bit. */
15207 && GET_MODE (x
) == SImode
15208 && (op
== EQ
|| op
== NE
)
15209 && GET_CODE (x
) == ZERO_EXTRACT
15210 && XEXP (x
, 1) == const1_rtx
)
15213 /* An operation that sets the condition codes as a side-effect, the
15214 V flag is not set correctly, so we can only use comparisons where
15215 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15217 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15218 if (GET_MODE (x
) == SImode
15220 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
15221 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
15222 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
15223 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
15224 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
15225 || GET_CODE (x
) == LSHIFTRT
15226 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
15227 || GET_CODE (x
) == ROTATERT
15228 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
15229 return CC_NOOVmode
;
15231 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
15234 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
15235 && GET_CODE (x
) == PLUS
15236 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
15239 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
15245 /* A DImode comparison against zero can be implemented by
15246 or'ing the two halves together. */
15247 if (y
== const0_rtx
)
15250 /* We can do an equality test in three Thumb instructions. */
15260 /* DImode unsigned comparisons can be implemented by cmp +
15261 cmpeq without a scratch register. Not worth doing in
15272 /* DImode signed and unsigned comparisons can be implemented
15273 by cmp + sbcs with a scratch register, but that does not
15274 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15275 gcc_assert (op
!= EQ
&& op
!= NE
);
15279 gcc_unreachable ();
15283 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
15284 return GET_MODE (x
);
15289 /* X and Y are two things to compare using CODE. Emit the compare insn and
15290 return the rtx for register 0 in the proper mode. FP means this is a
15291 floating point compare: I don't think that it is needed on the arm. */
15293 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
15297 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
15299 /* We might have X as a constant, Y as a register because of the predicates
15300 used for cmpdi. If so, force X to a register here. */
15301 if (dimode_comparison
&& !REG_P (x
))
15302 x
= force_reg (DImode
, x
);
15304 mode
= SELECT_CC_MODE (code
, x
, y
);
15305 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
15307 if (dimode_comparison
15308 && mode
!= CC_CZmode
)
15312 /* To compare two non-zero values for equality, XOR them and
15313 then compare against zero. Not used for ARM mode; there
15314 CC_CZmode is cheaper. */
15315 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
15317 gcc_assert (!reload_completed
);
15318 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
15322 /* A scratch register is required. */
15323 if (reload_completed
)
15324 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
15326 scratch
= gen_rtx_SCRATCH (SImode
);
15328 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
15329 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15330 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
15333 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
15338 /* Generate a sequence of insns that will generate the correct return
15339 address mask depending on the physical architecture that the program
15342 arm_gen_return_addr_mask (void)
15344 rtx reg
= gen_reg_rtx (Pmode
);
15346 emit_insn (gen_return_addr_mask (reg
));
15351 arm_reload_in_hi (rtx
*operands
)
15353 rtx ref
= operands
[1];
15355 HOST_WIDE_INT offset
= 0;
15357 if (GET_CODE (ref
) == SUBREG
)
15359 offset
= SUBREG_BYTE (ref
);
15360 ref
= SUBREG_REG (ref
);
15365 /* We have a pseudo which has been spilt onto the stack; there
15366 are two cases here: the first where there is a simple
15367 stack-slot replacement and a second where the stack-slot is
15368 out of range, or is used as a subreg. */
15369 if (reg_equiv_mem (REGNO (ref
)))
15371 ref
= reg_equiv_mem (REGNO (ref
));
15372 base
= find_replacement (&XEXP (ref
, 0));
15375 /* The slot is out of range, or was dressed up in a SUBREG. */
15376 base
= reg_equiv_address (REGNO (ref
));
15379 base
= find_replacement (&XEXP (ref
, 0));
15381 /* Handle the case where the address is too complex to be offset by 1. */
15382 if (GET_CODE (base
) == MINUS
15383 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15385 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15387 emit_set_insn (base_plus
, base
);
15390 else if (GET_CODE (base
) == PLUS
)
15392 /* The addend must be CONST_INT, or we would have dealt with it above. */
15393 HOST_WIDE_INT hi
, lo
;
15395 offset
+= INTVAL (XEXP (base
, 1));
15396 base
= XEXP (base
, 0);
15398 /* Rework the address into a legal sequence of insns. */
15399 /* Valid range for lo is -4095 -> 4095 */
15402 : -((-offset
) & 0xfff));
15404 /* Corner case, if lo is the max offset then we would be out of range
15405 once we have added the additional 1 below, so bump the msb into the
15406 pre-loading insn(s). */
15410 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15411 ^ (HOST_WIDE_INT
) 0x80000000)
15412 - (HOST_WIDE_INT
) 0x80000000);
15414 gcc_assert (hi
+ lo
== offset
);
15418 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15420 /* Get the base address; addsi3 knows how to handle constants
15421 that require more than one insn. */
15422 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15428 /* Operands[2] may overlap operands[0] (though it won't overlap
15429 operands[1]), that's why we asked for a DImode reg -- so we can
15430 use the bit that does not overlap. */
15431 if (REGNO (operands
[2]) == REGNO (operands
[0]))
15432 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15434 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15436 emit_insn (gen_zero_extendqisi2 (scratch
,
15437 gen_rtx_MEM (QImode
,
15438 plus_constant (Pmode
, base
,
15440 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15441 gen_rtx_MEM (QImode
,
15442 plus_constant (Pmode
, base
,
15444 if (!BYTES_BIG_ENDIAN
)
15445 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15446 gen_rtx_IOR (SImode
,
15449 gen_rtx_SUBREG (SImode
, operands
[0], 0),
15453 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
15454 gen_rtx_IOR (SImode
,
15455 gen_rtx_ASHIFT (SImode
, scratch
,
15457 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
15460 /* Handle storing a half-word to memory during reload by synthesizing as two
15461 byte stores. Take care not to clobber the input values until after we
15462 have moved them somewhere safe. This code assumes that if the DImode
15463 scratch in operands[2] overlaps either the input value or output address
15464 in some way, then that value must die in this insn (we absolutely need
15465 two scratch registers for some corner cases). */
15467 arm_reload_out_hi (rtx
*operands
)
15469 rtx ref
= operands
[0];
15470 rtx outval
= operands
[1];
15472 HOST_WIDE_INT offset
= 0;
15474 if (GET_CODE (ref
) == SUBREG
)
15476 offset
= SUBREG_BYTE (ref
);
15477 ref
= SUBREG_REG (ref
);
15482 /* We have a pseudo which has been spilt onto the stack; there
15483 are two cases here: the first where there is a simple
15484 stack-slot replacement and a second where the stack-slot is
15485 out of range, or is used as a subreg. */
15486 if (reg_equiv_mem (REGNO (ref
)))
15488 ref
= reg_equiv_mem (REGNO (ref
));
15489 base
= find_replacement (&XEXP (ref
, 0));
15492 /* The slot is out of range, or was dressed up in a SUBREG. */
15493 base
= reg_equiv_address (REGNO (ref
));
15496 base
= find_replacement (&XEXP (ref
, 0));
15498 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15500 /* Handle the case where the address is too complex to be offset by 1. */
15501 if (GET_CODE (base
) == MINUS
15502 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15504 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15506 /* Be careful not to destroy OUTVAL. */
15507 if (reg_overlap_mentioned_p (base_plus
, outval
))
15509 /* Updating base_plus might destroy outval, see if we can
15510 swap the scratch and base_plus. */
15511 if (!reg_overlap_mentioned_p (scratch
, outval
))
15512 std::swap (scratch
, base_plus
);
15515 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15517 /* Be conservative and copy OUTVAL into the scratch now,
15518 this should only be necessary if outval is a subreg
15519 of something larger than a word. */
15520 /* XXX Might this clobber base? I can't see how it can,
15521 since scratch is known to overlap with OUTVAL, and
15522 must be wider than a word. */
15523 emit_insn (gen_movhi (scratch_hi
, outval
));
15524 outval
= scratch_hi
;
15528 emit_set_insn (base_plus
, base
);
15531 else if (GET_CODE (base
) == PLUS
)
15533 /* The addend must be CONST_INT, or we would have dealt with it above. */
15534 HOST_WIDE_INT hi
, lo
;
15536 offset
+= INTVAL (XEXP (base
, 1));
15537 base
= XEXP (base
, 0);
15539 /* Rework the address into a legal sequence of insns. */
15540 /* Valid range for lo is -4095 -> 4095 */
15543 : -((-offset
) & 0xfff));
15545 /* Corner case, if lo is the max offset then we would be out of range
15546 once we have added the additional 1 below, so bump the msb into the
15547 pre-loading insn(s). */
15551 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15552 ^ (HOST_WIDE_INT
) 0x80000000)
15553 - (HOST_WIDE_INT
) 0x80000000);
15555 gcc_assert (hi
+ lo
== offset
);
15559 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15561 /* Be careful not to destroy OUTVAL. */
15562 if (reg_overlap_mentioned_p (base_plus
, outval
))
15564 /* Updating base_plus might destroy outval, see if we
15565 can swap the scratch and base_plus. */
15566 if (!reg_overlap_mentioned_p (scratch
, outval
))
15567 std::swap (scratch
, base_plus
);
15570 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15572 /* Be conservative and copy outval into scratch now,
15573 this should only be necessary if outval is a
15574 subreg of something larger than a word. */
15575 /* XXX Might this clobber base? I can't see how it
15576 can, since scratch is known to overlap with
15578 emit_insn (gen_movhi (scratch_hi
, outval
));
15579 outval
= scratch_hi
;
15583 /* Get the base address; addsi3 knows how to handle constants
15584 that require more than one insn. */
15585 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15591 if (BYTES_BIG_ENDIAN
)
15593 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15594 plus_constant (Pmode
, base
,
15596 gen_lowpart (QImode
, outval
)));
15597 emit_insn (gen_lshrsi3 (scratch
,
15598 gen_rtx_SUBREG (SImode
, outval
, 0),
15600 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15602 gen_lowpart (QImode
, scratch
)));
15606 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15608 gen_lowpart (QImode
, outval
)));
15609 emit_insn (gen_lshrsi3 (scratch
,
15610 gen_rtx_SUBREG (SImode
, outval
, 0),
15612 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15613 plus_constant (Pmode
, base
,
15615 gen_lowpart (QImode
, scratch
)));
15619 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15620 (padded to the size of a word) should be passed in a register. */
15623 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15625 if (TARGET_AAPCS_BASED
)
15626 return must_pass_in_stack_var_size (mode
, type
);
15628 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15632 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15633 Return true if an argument passed on the stack should be padded upwards,
15634 i.e. if the least-significant byte has useful data.
15635 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15636 aggregate types are placed in the lowest memory address. */
15639 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15641 if (!TARGET_AAPCS_BASED
)
15642 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15644 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15651 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15652 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15653 register has useful data, and return the opposite if the most
15654 significant byte does. */
15657 arm_pad_reg_upward (machine_mode mode
,
15658 tree type
, int first ATTRIBUTE_UNUSED
)
15660 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15662 /* For AAPCS, small aggregates, small fixed-point types,
15663 and small complex types are always padded upwards. */
15666 if ((AGGREGATE_TYPE_P (type
)
15667 || TREE_CODE (type
) == COMPLEX_TYPE
15668 || FIXED_POINT_TYPE_P (type
))
15669 && int_size_in_bytes (type
) <= 4)
15674 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15675 && GET_MODE_SIZE (mode
) <= 4)
15680 /* Otherwise, use default padding. */
15681 return !BYTES_BIG_ENDIAN
;
15684 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15685 assuming that the address in the base register is word aligned. */
15687 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15689 HOST_WIDE_INT max_offset
;
15691 /* Offset must be a multiple of 4 in Thumb mode. */
15692 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15697 else if (TARGET_ARM
)
15702 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15705 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15706 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15707 Assumes that the address in the base register RN is word aligned. Pattern
15708 guarantees that both memory accesses use the same base register,
15709 the offsets are constants within the range, and the gap between the offsets is 4.
15710 If preload complete then check that registers are legal. WBACK indicates whether
15711 address is updated. LOAD indicates whether memory access is load or store. */
15713 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15714 bool wback
, bool load
)
15716 unsigned int t
, t2
, n
;
15718 if (!reload_completed
)
15721 if (!offset_ok_for_ldrd_strd (offset
))
15728 if ((TARGET_THUMB2
)
15729 && ((wback
&& (n
== t
|| n
== t2
))
15730 || (t
== SP_REGNUM
)
15731 || (t
== PC_REGNUM
)
15732 || (t2
== SP_REGNUM
)
15733 || (t2
== PC_REGNUM
)
15734 || (!load
&& (n
== PC_REGNUM
))
15735 || (load
&& (t
== t2
))
15736 /* Triggers Cortex-M3 LDRD errata. */
15737 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15741 && ((wback
&& (n
== t
|| n
== t2
))
15742 || (t2
== PC_REGNUM
)
15743 || (t
% 2 != 0) /* First destination register is not even. */
15745 /* PC can be used as base register (for offset addressing only),
15746 but it is depricated. */
15747 || (n
== PC_REGNUM
)))
15753 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15754 operand MEM's address contains an immediate offset from the base
15755 register and has no side effects, in which case it sets BASE and
15756 OFFSET accordingly. */
15758 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15762 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15764 /* TODO: Handle more general memory operand patterns, such as
15765 PRE_DEC and PRE_INC. */
15767 if (side_effects_p (mem
))
15770 /* Can't deal with subregs. */
15771 if (GET_CODE (mem
) == SUBREG
)
15774 gcc_assert (MEM_P (mem
));
15776 *offset
= const0_rtx
;
15778 addr
= XEXP (mem
, 0);
15780 /* If addr isn't valid for DImode, then we can't handle it. */
15781 if (!arm_legitimate_address_p (DImode
, addr
,
15782 reload_in_progress
|| reload_completed
))
15790 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15792 *base
= XEXP (addr
, 0);
15793 *offset
= XEXP (addr
, 1);
15794 return (REG_P (*base
) && CONST_INT_P (*offset
));
15800 /* Called from a peephole2 to replace two word-size accesses with a
15801 single LDRD/STRD instruction. Returns true iff we can generate a
15802 new instruction sequence. That is, both accesses use the same base
15803 register and the gap between constant offsets is 4. This function
15804 may reorder its operands to match ldrd/strd RTL templates.
15805 OPERANDS are the operands found by the peephole matcher;
15806 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15807 corresponding memory operands. LOAD indicaates whether the access
15808 is load or store. CONST_STORE indicates a store of constant
15809 integer values held in OPERANDS[4,5] and assumes that the pattern
15810 is of length 4 insn, for the purpose of checking dead registers.
15811 COMMUTE indicates that register operands may be reordered. */
15813 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15814 bool const_store
, bool commute
)
15817 HOST_WIDE_INT offsets
[2], offset
;
15818 rtx base
= NULL_RTX
;
15819 rtx cur_base
, cur_offset
, tmp
;
15821 HARD_REG_SET regset
;
15823 gcc_assert (!const_store
|| !load
);
15824 /* Check that the memory references are immediate offsets from the
15825 same base register. Extract the base register, the destination
15826 registers, and the corresponding memory offsets. */
15827 for (i
= 0; i
< nops
; i
++)
15829 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15834 else if (REGNO (base
) != REGNO (cur_base
))
15837 offsets
[i
] = INTVAL (cur_offset
);
15838 if (GET_CODE (operands
[i
]) == SUBREG
)
15840 tmp
= SUBREG_REG (operands
[i
]);
15841 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15846 /* Make sure there is no dependency between the individual loads. */
15847 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15848 return false; /* RAW */
15850 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15851 return false; /* WAW */
15853 /* If the same input register is used in both stores
15854 when storing different constants, try to find a free register.
15855 For example, the code
15860 can be transformed into
15863 in Thumb mode assuming that r1 is free. */
15865 && REGNO (operands
[0]) == REGNO (operands
[1])
15866 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15870 CLEAR_HARD_REG_SET (regset
);
15871 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15872 if (tmp
== NULL_RTX
)
15875 /* Use the new register in the first load to ensure that
15876 if the original input register is not dead after peephole,
15877 then it will have the correct constant value. */
15880 else if (TARGET_ARM
)
15883 int regno
= REGNO (operands
[0]);
15884 if (!peep2_reg_dead_p (4, operands
[0]))
15886 /* When the input register is even and is not dead after the
15887 pattern, it has to hold the second constant but we cannot
15888 form a legal STRD in ARM mode with this register as the second
15890 if (regno
% 2 == 0)
15893 /* Is regno-1 free? */
15894 SET_HARD_REG_SET (regset
);
15895 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15896 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15897 if (tmp
== NULL_RTX
)
15904 /* Find a DImode register. */
15905 CLEAR_HARD_REG_SET (regset
);
15906 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15907 if (tmp
!= NULL_RTX
)
15909 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15910 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15914 /* Can we use the input register to form a DI register? */
15915 SET_HARD_REG_SET (regset
);
15916 CLEAR_HARD_REG_BIT(regset
,
15917 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15918 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15919 if (tmp
== NULL_RTX
)
15921 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15925 gcc_assert (operands
[0] != NULL_RTX
);
15926 gcc_assert (operands
[1] != NULL_RTX
);
15927 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15928 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15932 /* Make sure the instructions are ordered with lower memory access first. */
15933 if (offsets
[0] > offsets
[1])
15935 gap
= offsets
[0] - offsets
[1];
15936 offset
= offsets
[1];
15938 /* Swap the instructions such that lower memory is accessed first. */
15939 std::swap (operands
[0], operands
[1]);
15940 std::swap (operands
[2], operands
[3]);
15942 std::swap (operands
[4], operands
[5]);
15946 gap
= offsets
[1] - offsets
[0];
15947 offset
= offsets
[0];
15950 /* Make sure accesses are to consecutive memory locations. */
15954 /* Make sure we generate legal instructions. */
15955 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15959 /* In Thumb state, where registers are almost unconstrained, there
15960 is little hope to fix it. */
15964 if (load
&& commute
)
15966 /* Try reordering registers. */
15967 std::swap (operands
[0], operands
[1]);
15968 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15975 /* If input registers are dead after this pattern, they can be
15976 reordered or replaced by other registers that are free in the
15977 current pattern. */
15978 if (!peep2_reg_dead_p (4, operands
[0])
15979 || !peep2_reg_dead_p (4, operands
[1]))
15982 /* Try to reorder the input registers. */
15983 /* For example, the code
15988 can be transformed into
15993 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15996 std::swap (operands
[0], operands
[1]);
16000 /* Try to find a free DI register. */
16001 CLEAR_HARD_REG_SET (regset
);
16002 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
16003 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
16006 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
16007 if (tmp
== NULL_RTX
)
16010 /* DREG must be an even-numbered register in DImode.
16011 Split it into SI registers. */
16012 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
16013 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
16014 gcc_assert (operands
[0] != NULL_RTX
);
16015 gcc_assert (operands
[1] != NULL_RTX
);
16016 gcc_assert (REGNO (operands
[0]) % 2 == 0);
16017 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
16019 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
16031 /* Print a symbolic form of X to the debug file, F. */
16033 arm_print_value (FILE *f
, rtx x
)
16035 switch (GET_CODE (x
))
16038 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
16042 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
16050 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
16052 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
16053 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
16061 fprintf (f
, "\"%s\"", XSTR (x
, 0));
16065 fprintf (f
, "`%s'", XSTR (x
, 0));
16069 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
16073 arm_print_value (f
, XEXP (x
, 0));
16077 arm_print_value (f
, XEXP (x
, 0));
16079 arm_print_value (f
, XEXP (x
, 1));
16087 fprintf (f
, "????");
16092 /* Routines for manipulation of the constant pool. */
16094 /* Arm instructions cannot load a large constant directly into a
16095 register; they have to come from a pc relative load. The constant
16096 must therefore be placed in the addressable range of the pc
16097 relative load. Depending on the precise pc relative load
16098 instruction the range is somewhere between 256 bytes and 4k. This
16099 means that we often have to dump a constant inside a function, and
16100 generate code to branch around it.
16102 It is important to minimize this, since the branches will slow
16103 things down and make the code larger.
16105 Normally we can hide the table after an existing unconditional
16106 branch so that there is no interruption of the flow, but in the
16107 worst case the code looks like this:
16125 We fix this by performing a scan after scheduling, which notices
16126 which instructions need to have their operands fetched from the
16127 constant table and builds the table.
16129 The algorithm starts by building a table of all the constants that
16130 need fixing up and all the natural barriers in the function (places
16131 where a constant table can be dropped without breaking the flow).
16132 For each fixup we note how far the pc-relative replacement will be
16133 able to reach and the offset of the instruction into the function.
16135 Having built the table we then group the fixes together to form
16136 tables that are as large as possible (subject to addressing
16137 constraints) and emit each table of constants after the last
16138 barrier that is within range of all the instructions in the group.
16139 If a group does not contain a barrier, then we forcibly create one
16140 by inserting a jump instruction into the flow. Once the table has
16141 been inserted, the insns are then modified to reference the
16142 relevant entry in the pool.
16144 Possible enhancements to the algorithm (not implemented) are:
16146 1) For some processors and object formats, there may be benefit in
16147 aligning the pools to the start of cache lines; this alignment
16148 would need to be taken into account when calculating addressability
16151 /* These typedefs are located at the start of this file, so that
16152 they can be used in the prototypes there. This comment is to
16153 remind readers of that fact so that the following structures
16154 can be understood more easily.
16156 typedef struct minipool_node Mnode;
16157 typedef struct minipool_fixup Mfix; */
16159 struct minipool_node
16161 /* Doubly linked chain of entries. */
16164 /* The maximum offset into the code that this entry can be placed. While
16165 pushing fixes for forward references, all entries are sorted in order
16166 of increasing max_address. */
16167 HOST_WIDE_INT max_address
;
16168 /* Similarly for an entry inserted for a backwards ref. */
16169 HOST_WIDE_INT min_address
;
16170 /* The number of fixes referencing this entry. This can become zero
16171 if we "unpush" an entry. In this case we ignore the entry when we
16172 come to emit the code. */
16174 /* The offset from the start of the minipool. */
16175 HOST_WIDE_INT offset
;
16176 /* The value in table. */
16178 /* The mode of value. */
16180 /* The size of the value. With iWMMXt enabled
16181 sizes > 4 also imply an alignment of 8-bytes. */
16185 struct minipool_fixup
16189 HOST_WIDE_INT address
;
16195 HOST_WIDE_INT forwards
;
16196 HOST_WIDE_INT backwards
;
16199 /* Fixes less than a word need padding out to a word boundary. */
16200 #define MINIPOOL_FIX_SIZE(mode) \
16201 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16203 static Mnode
* minipool_vector_head
;
16204 static Mnode
* minipool_vector_tail
;
16205 static rtx_code_label
*minipool_vector_label
;
16206 static int minipool_pad
;
16208 /* The linked list of all minipool fixes required for this function. */
16209 Mfix
* minipool_fix_head
;
16210 Mfix
* minipool_fix_tail
;
16211 /* The fix entry for the current minipool, once it has been placed. */
16212 Mfix
* minipool_barrier
;
16214 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16215 #define JUMP_TABLES_IN_TEXT_SECTION 0
16218 static HOST_WIDE_INT
16219 get_jump_table_size (rtx_jump_table_data
*insn
)
16221 /* ADDR_VECs only take room if read-only data does into the text
16223 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
16225 rtx body
= PATTERN (insn
);
16226 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
16227 HOST_WIDE_INT size
;
16228 HOST_WIDE_INT modesize
;
16230 modesize
= GET_MODE_SIZE (GET_MODE (body
));
16231 size
= modesize
* XVECLEN (body
, elt
);
16235 /* Round up size of TBB table to a halfword boundary. */
16236 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
16239 /* No padding necessary for TBH. */
16242 /* Add two bytes for alignment on Thumb. */
16247 gcc_unreachable ();
16255 /* Return the maximum amount of padding that will be inserted before
16258 static HOST_WIDE_INT
16259 get_label_padding (rtx label
)
16261 HOST_WIDE_INT align
, min_insn_size
;
16263 align
= 1 << label_to_alignment (label
);
16264 min_insn_size
= TARGET_THUMB
? 2 : 4;
16265 return align
> min_insn_size
? align
- min_insn_size
: 0;
16268 /* Move a minipool fix MP from its current location to before MAX_MP.
16269 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16270 constraints may need updating. */
16272 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
16273 HOST_WIDE_INT max_address
)
16275 /* The code below assumes these are different. */
16276 gcc_assert (mp
!= max_mp
);
16278 if (max_mp
== NULL
)
16280 if (max_address
< mp
->max_address
)
16281 mp
->max_address
= max_address
;
16285 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16286 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16288 mp
->max_address
= max_address
;
16290 /* Unlink MP from its current position. Since max_mp is non-null,
16291 mp->prev must be non-null. */
16292 mp
->prev
->next
= mp
->next
;
16293 if (mp
->next
!= NULL
)
16294 mp
->next
->prev
= mp
->prev
;
16296 minipool_vector_tail
= mp
->prev
;
16298 /* Re-insert it before MAX_MP. */
16300 mp
->prev
= max_mp
->prev
;
16303 if (mp
->prev
!= NULL
)
16304 mp
->prev
->next
= mp
;
16306 minipool_vector_head
= mp
;
16309 /* Save the new entry. */
16312 /* Scan over the preceding entries and adjust their addresses as
16314 while (mp
->prev
!= NULL
16315 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16317 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16324 /* Add a constant to the minipool for a forward reference. Returns the
16325 node added or NULL if the constant will not fit in this pool. */
16327 add_minipool_forward_ref (Mfix
*fix
)
16329 /* If set, max_mp is the first pool_entry that has a lower
16330 constraint than the one we are trying to add. */
16331 Mnode
* max_mp
= NULL
;
16332 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
16335 /* If the minipool starts before the end of FIX->INSN then this FIX
16336 can not be placed into the current pool. Furthermore, adding the
16337 new constant pool entry may cause the pool to start FIX_SIZE bytes
16339 if (minipool_vector_head
&&
16340 (fix
->address
+ get_attr_length (fix
->insn
)
16341 >= minipool_vector_head
->max_address
- fix
->fix_size
))
16344 /* Scan the pool to see if a constant with the same value has
16345 already been added. While we are doing this, also note the
16346 location where we must insert the constant if it doesn't already
16348 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16350 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16351 && fix
->mode
== mp
->mode
16352 && (!LABEL_P (fix
->value
)
16353 || (CODE_LABEL_NUMBER (fix
->value
)
16354 == CODE_LABEL_NUMBER (mp
->value
)))
16355 && rtx_equal_p (fix
->value
, mp
->value
))
16357 /* More than one fix references this entry. */
16359 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
16362 /* Note the insertion point if necessary. */
16364 && mp
->max_address
> max_address
)
16367 /* If we are inserting an 8-bytes aligned quantity and
16368 we have not already found an insertion point, then
16369 make sure that all such 8-byte aligned quantities are
16370 placed at the start of the pool. */
16371 if (ARM_DOUBLEWORD_ALIGN
16373 && fix
->fix_size
>= 8
16374 && mp
->fix_size
< 8)
16377 max_address
= mp
->max_address
;
16381 /* The value is not currently in the minipool, so we need to create
16382 a new entry for it. If MAX_MP is NULL, the entry will be put on
16383 the end of the list since the placement is less constrained than
16384 any existing entry. Otherwise, we insert the new fix before
16385 MAX_MP and, if necessary, adjust the constraints on the other
16388 mp
->fix_size
= fix
->fix_size
;
16389 mp
->mode
= fix
->mode
;
16390 mp
->value
= fix
->value
;
16392 /* Not yet required for a backwards ref. */
16393 mp
->min_address
= -65536;
16395 if (max_mp
== NULL
)
16397 mp
->max_address
= max_address
;
16399 mp
->prev
= minipool_vector_tail
;
16401 if (mp
->prev
== NULL
)
16403 minipool_vector_head
= mp
;
16404 minipool_vector_label
= gen_label_rtx ();
16407 mp
->prev
->next
= mp
;
16409 minipool_vector_tail
= mp
;
16413 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
16414 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
16416 mp
->max_address
= max_address
;
16419 mp
->prev
= max_mp
->prev
;
16421 if (mp
->prev
!= NULL
)
16422 mp
->prev
->next
= mp
;
16424 minipool_vector_head
= mp
;
16427 /* Save the new entry. */
16430 /* Scan over the preceding entries and adjust their addresses as
16432 while (mp
->prev
!= NULL
16433 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16435 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16443 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16444 HOST_WIDE_INT min_address
)
16446 HOST_WIDE_INT offset
;
16448 /* The code below assumes these are different. */
16449 gcc_assert (mp
!= min_mp
);
16451 if (min_mp
== NULL
)
16453 if (min_address
> mp
->min_address
)
16454 mp
->min_address
= min_address
;
16458 /* We will adjust this below if it is too loose. */
16459 mp
->min_address
= min_address
;
16461 /* Unlink MP from its current position. Since min_mp is non-null,
16462 mp->next must be non-null. */
16463 mp
->next
->prev
= mp
->prev
;
16464 if (mp
->prev
!= NULL
)
16465 mp
->prev
->next
= mp
->next
;
16467 minipool_vector_head
= mp
->next
;
16469 /* Reinsert it after MIN_MP. */
16471 mp
->next
= min_mp
->next
;
16473 if (mp
->next
!= NULL
)
16474 mp
->next
->prev
= mp
;
16476 minipool_vector_tail
= mp
;
16482 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16484 mp
->offset
= offset
;
16485 if (mp
->refcount
> 0)
16486 offset
+= mp
->fix_size
;
16488 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16489 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16495 /* Add a constant to the minipool for a backward reference. Returns the
16496 node added or NULL if the constant will not fit in this pool.
16498 Note that the code for insertion for a backwards reference can be
16499 somewhat confusing because the calculated offsets for each fix do
16500 not take into account the size of the pool (which is still under
16503 add_minipool_backward_ref (Mfix
*fix
)
16505 /* If set, min_mp is the last pool_entry that has a lower constraint
16506 than the one we are trying to add. */
16507 Mnode
*min_mp
= NULL
;
16508 /* This can be negative, since it is only a constraint. */
16509 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16512 /* If we can't reach the current pool from this insn, or if we can't
16513 insert this entry at the end of the pool without pushing other
16514 fixes out of range, then we don't try. This ensures that we
16515 can't fail later on. */
16516 if (min_address
>= minipool_barrier
->address
16517 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16518 >= minipool_barrier
->address
))
16521 /* Scan the pool to see if a constant with the same value has
16522 already been added. While we are doing this, also note the
16523 location where we must insert the constant if it doesn't already
16525 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16527 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16528 && fix
->mode
== mp
->mode
16529 && (!LABEL_P (fix
->value
)
16530 || (CODE_LABEL_NUMBER (fix
->value
)
16531 == CODE_LABEL_NUMBER (mp
->value
)))
16532 && rtx_equal_p (fix
->value
, mp
->value
)
16533 /* Check that there is enough slack to move this entry to the
16534 end of the table (this is conservative). */
16535 && (mp
->max_address
16536 > (minipool_barrier
->address
16537 + minipool_vector_tail
->offset
16538 + minipool_vector_tail
->fix_size
)))
16541 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16544 if (min_mp
!= NULL
)
16545 mp
->min_address
+= fix
->fix_size
;
16548 /* Note the insertion point if necessary. */
16549 if (mp
->min_address
< min_address
)
16551 /* For now, we do not allow the insertion of 8-byte alignment
16552 requiring nodes anywhere but at the start of the pool. */
16553 if (ARM_DOUBLEWORD_ALIGN
16554 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16559 else if (mp
->max_address
16560 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16562 /* Inserting before this entry would push the fix beyond
16563 its maximum address (which can happen if we have
16564 re-located a forwards fix); force the new fix to come
16566 if (ARM_DOUBLEWORD_ALIGN
16567 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16572 min_address
= mp
->min_address
+ fix
->fix_size
;
16575 /* Do not insert a non-8-byte aligned quantity before 8-byte
16576 aligned quantities. */
16577 else if (ARM_DOUBLEWORD_ALIGN
16578 && fix
->fix_size
< 8
16579 && mp
->fix_size
>= 8)
16582 min_address
= mp
->min_address
+ fix
->fix_size
;
16587 /* We need to create a new entry. */
16589 mp
->fix_size
= fix
->fix_size
;
16590 mp
->mode
= fix
->mode
;
16591 mp
->value
= fix
->value
;
16593 mp
->max_address
= minipool_barrier
->address
+ 65536;
16595 mp
->min_address
= min_address
;
16597 if (min_mp
== NULL
)
16600 mp
->next
= minipool_vector_head
;
16602 if (mp
->next
== NULL
)
16604 minipool_vector_tail
= mp
;
16605 minipool_vector_label
= gen_label_rtx ();
16608 mp
->next
->prev
= mp
;
16610 minipool_vector_head
= mp
;
16614 mp
->next
= min_mp
->next
;
16618 if (mp
->next
!= NULL
)
16619 mp
->next
->prev
= mp
;
16621 minipool_vector_tail
= mp
;
16624 /* Save the new entry. */
16632 /* Scan over the following entries and adjust their offsets. */
16633 while (mp
->next
!= NULL
)
16635 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16636 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16639 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16641 mp
->next
->offset
= mp
->offset
;
16650 assign_minipool_offsets (Mfix
*barrier
)
16652 HOST_WIDE_INT offset
= 0;
16655 minipool_barrier
= barrier
;
16657 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16659 mp
->offset
= offset
;
16661 if (mp
->refcount
> 0)
16662 offset
+= mp
->fix_size
;
16666 /* Output the literal table */
16668 dump_minipool (rtx_insn
*scan
)
16674 if (ARM_DOUBLEWORD_ALIGN
)
16675 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16676 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16683 fprintf (dump_file
,
16684 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16685 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16687 scan
= emit_label_after (gen_label_rtx (), scan
);
16688 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16689 scan
= emit_label_after (minipool_vector_label
, scan
);
16691 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16693 if (mp
->refcount
> 0)
16697 fprintf (dump_file
,
16698 ";; Offset %u, min %ld, max %ld ",
16699 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16700 (unsigned long) mp
->max_address
);
16701 arm_print_value (dump_file
, mp
->value
);
16702 fputc ('\n', dump_file
);
16705 switch (GET_MODE_SIZE (mp
->mode
))
16707 #ifdef HAVE_consttable_1
16709 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
16713 #ifdef HAVE_consttable_2
16715 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
16719 #ifdef HAVE_consttable_4
16721 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
16725 #ifdef HAVE_consttable_8
16727 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
16731 #ifdef HAVE_consttable_16
16733 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
16738 gcc_unreachable ();
16746 minipool_vector_head
= minipool_vector_tail
= NULL
;
16747 scan
= emit_insn_after (gen_consttable_end (), scan
);
16748 scan
= emit_barrier_after (scan
);
16751 /* Return the cost of forcibly inserting a barrier after INSN. */
16753 arm_barrier_cost (rtx_insn
*insn
)
16755 /* Basing the location of the pool on the loop depth is preferable,
16756 but at the moment, the basic block information seems to be
16757 corrupt by this stage of the compilation. */
16758 int base_cost
= 50;
16759 rtx_insn
*next
= next_nonnote_insn (insn
);
16761 if (next
!= NULL
&& LABEL_P (next
))
16764 switch (GET_CODE (insn
))
16767 /* It will always be better to place the table before the label, rather
16776 return base_cost
- 10;
16779 return base_cost
+ 10;
16783 /* Find the best place in the insn stream in the range
16784 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16785 Create the barrier by inserting a jump and add a new fix entry for
16788 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16790 HOST_WIDE_INT count
= 0;
16791 rtx_barrier
*barrier
;
16792 rtx_insn
*from
= fix
->insn
;
16793 /* The instruction after which we will insert the jump. */
16794 rtx_insn
*selected
= NULL
;
16796 /* The address at which the jump instruction will be placed. */
16797 HOST_WIDE_INT selected_address
;
16799 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16800 rtx_code_label
*label
= gen_label_rtx ();
16802 selected_cost
= arm_barrier_cost (from
);
16803 selected_address
= fix
->address
;
16805 while (from
&& count
< max_count
)
16807 rtx_jump_table_data
*tmp
;
16810 /* This code shouldn't have been called if there was a natural barrier
16812 gcc_assert (!BARRIER_P (from
));
16814 /* Count the length of this insn. This must stay in sync with the
16815 code that pushes minipool fixes. */
16816 if (LABEL_P (from
))
16817 count
+= get_label_padding (from
);
16819 count
+= get_attr_length (from
);
16821 /* If there is a jump table, add its length. */
16822 if (tablejump_p (from
, NULL
, &tmp
))
16824 count
+= get_jump_table_size (tmp
);
16826 /* Jump tables aren't in a basic block, so base the cost on
16827 the dispatch insn. If we select this location, we will
16828 still put the pool after the table. */
16829 new_cost
= arm_barrier_cost (from
);
16831 if (count
< max_count
16832 && (!selected
|| new_cost
<= selected_cost
))
16835 selected_cost
= new_cost
;
16836 selected_address
= fix
->address
+ count
;
16839 /* Continue after the dispatch table. */
16840 from
= NEXT_INSN (tmp
);
16844 new_cost
= arm_barrier_cost (from
);
16846 if (count
< max_count
16847 && (!selected
|| new_cost
<= selected_cost
))
16850 selected_cost
= new_cost
;
16851 selected_address
= fix
->address
+ count
;
16854 from
= NEXT_INSN (from
);
16857 /* Make sure that we found a place to insert the jump. */
16858 gcc_assert (selected
);
16860 /* Make sure we do not split a call and its corresponding
16861 CALL_ARG_LOCATION note. */
16862 if (CALL_P (selected
))
16864 rtx_insn
*next
= NEXT_INSN (selected
);
16865 if (next
&& NOTE_P (next
)
16866 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16870 /* Create a new JUMP_INSN that branches around a barrier. */
16871 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16872 JUMP_LABEL (from
) = label
;
16873 barrier
= emit_barrier_after (from
);
16874 emit_label_after (label
, barrier
);
16876 /* Create a minipool barrier entry for the new barrier. */
16877 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16878 new_fix
->insn
= barrier
;
16879 new_fix
->address
= selected_address
;
16880 new_fix
->next
= fix
->next
;
16881 fix
->next
= new_fix
;
16886 /* Record that there is a natural barrier in the insn stream at
16889 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16891 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16894 fix
->address
= address
;
16897 if (minipool_fix_head
!= NULL
)
16898 minipool_fix_tail
->next
= fix
;
16900 minipool_fix_head
= fix
;
16902 minipool_fix_tail
= fix
;
16905 /* Record INSN, which will need fixing up to load a value from the
16906 minipool. ADDRESS is the offset of the insn since the start of the
16907 function; LOC is a pointer to the part of the insn which requires
16908 fixing; VALUE is the constant that must be loaded, which is of type
16911 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16912 machine_mode mode
, rtx value
)
16914 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16917 fix
->address
= address
;
16920 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16921 fix
->value
= value
;
16922 fix
->forwards
= get_attr_pool_range (insn
);
16923 fix
->backwards
= get_attr_neg_pool_range (insn
);
16924 fix
->minipool
= NULL
;
16926 /* If an insn doesn't have a range defined for it, then it isn't
16927 expecting to be reworked by this code. Better to stop now than
16928 to generate duff assembly code. */
16929 gcc_assert (fix
->forwards
|| fix
->backwards
);
16931 /* If an entry requires 8-byte alignment then assume all constant pools
16932 require 4 bytes of padding. Trying to do this later on a per-pool
16933 basis is awkward because existing pool entries have to be modified. */
16934 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16939 fprintf (dump_file
,
16940 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16941 GET_MODE_NAME (mode
),
16942 INSN_UID (insn
), (unsigned long) address
,
16943 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16944 arm_print_value (dump_file
, fix
->value
);
16945 fprintf (dump_file
, "\n");
16948 /* Add it to the chain of fixes. */
16951 if (minipool_fix_head
!= NULL
)
16952 minipool_fix_tail
->next
= fix
;
16954 minipool_fix_head
= fix
;
16956 minipool_fix_tail
= fix
;
16959 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16960 Returns the number of insns needed, or 99 if we always want to synthesize
16963 arm_max_const_double_inline_cost ()
16965 /* Let the value get synthesized to avoid the use of literal pools. */
16966 if (arm_disable_literal_pool
)
16969 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16972 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16973 Returns the number of insns needed, or 99 if we don't know how to
16976 arm_const_double_inline_cost (rtx val
)
16978 rtx lowpart
, highpart
;
16981 mode
= GET_MODE (val
);
16983 if (mode
== VOIDmode
)
16986 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16988 lowpart
= gen_lowpart (SImode
, val
);
16989 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16991 gcc_assert (CONST_INT_P (lowpart
));
16992 gcc_assert (CONST_INT_P (highpart
));
16994 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16995 NULL_RTX
, NULL_RTX
, 0, 0)
16996 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16997 NULL_RTX
, NULL_RTX
, 0, 0));
17000 /* Cost of loading a SImode constant. */
17002 arm_const_inline_cost (enum rtx_code code
, rtx val
)
17004 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
17005 NULL_RTX
, NULL_RTX
, 1, 0);
17008 /* Return true if it is worthwhile to split a 64-bit constant into two
17009 32-bit operations. This is the case if optimizing for size, or
17010 if we have load delay slots, or if one 32-bit part can be done with
17011 a single data operation. */
17013 arm_const_double_by_parts (rtx val
)
17015 machine_mode mode
= GET_MODE (val
);
17018 if (optimize_size
|| arm_ld_sched
)
17021 if (mode
== VOIDmode
)
17024 part
= gen_highpart_mode (SImode
, mode
, val
);
17026 gcc_assert (CONST_INT_P (part
));
17028 if (const_ok_for_arm (INTVAL (part
))
17029 || const_ok_for_arm (~INTVAL (part
)))
17032 part
= gen_lowpart (SImode
, val
);
17034 gcc_assert (CONST_INT_P (part
));
17036 if (const_ok_for_arm (INTVAL (part
))
17037 || const_ok_for_arm (~INTVAL (part
)))
17043 /* Return true if it is possible to inline both the high and low parts
17044 of a 64-bit constant into 32-bit data processing instructions. */
17046 arm_const_double_by_immediates (rtx val
)
17048 machine_mode mode
= GET_MODE (val
);
17051 if (mode
== VOIDmode
)
17054 part
= gen_highpart_mode (SImode
, mode
, val
);
17056 gcc_assert (CONST_INT_P (part
));
17058 if (!const_ok_for_arm (INTVAL (part
)))
17061 part
= gen_lowpart (SImode
, val
);
17063 gcc_assert (CONST_INT_P (part
));
17065 if (!const_ok_for_arm (INTVAL (part
)))
17071 /* Scan INSN and note any of its operands that need fixing.
17072 If DO_PUSHES is false we do not actually push any of the fixups
17075 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
17079 extract_constrain_insn (insn
);
17081 if (recog_data
.n_alternatives
== 0)
17084 /* Fill in recog_op_alt with information about the constraints of
17086 preprocess_constraints (insn
);
17088 const operand_alternative
*op_alt
= which_op_alt ();
17089 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
17091 /* Things we need to fix can only occur in inputs. */
17092 if (recog_data
.operand_type
[opno
] != OP_IN
)
17095 /* If this alternative is a memory reference, then any mention
17096 of constants in this alternative is really to fool reload
17097 into allowing us to accept one there. We need to fix them up
17098 now so that we output the right code. */
17099 if (op_alt
[opno
].memory_ok
)
17101 rtx op
= recog_data
.operand
[opno
];
17103 if (CONSTANT_P (op
))
17106 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
17107 recog_data
.operand_mode
[opno
], op
);
17109 else if (MEM_P (op
)
17110 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
17111 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
17115 rtx cop
= avoid_constant_pool_reference (op
);
17117 /* Casting the address of something to a mode narrower
17118 than a word can cause avoid_constant_pool_reference()
17119 to return the pool reference itself. That's no good to
17120 us here. Lets just hope that we can use the
17121 constant pool value directly. */
17123 cop
= get_pool_constant (XEXP (op
, 0));
17125 push_minipool_fix (insn
, address
,
17126 recog_data
.operand_loc
[opno
],
17127 recog_data
.operand_mode
[opno
], cop
);
17137 /* Rewrite move insn into subtract of 0 if the condition codes will
17138 be useful in next conditional jump insn. */
17141 thumb1_reorg (void)
17145 FOR_EACH_BB_FN (bb
, cfun
)
17148 rtx pat
, op0
, set
= NULL
;
17149 rtx_insn
*prev
, *insn
= BB_END (bb
);
17150 bool insn_clobbered
= false;
17152 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17153 insn
= PREV_INSN (insn
);
17155 /* Find the last cbranchsi4_insn in basic block BB. */
17156 if (insn
== BB_HEAD (bb
)
17157 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17160 /* Get the register with which we are comparing. */
17161 pat
= PATTERN (insn
);
17162 op0
= XEXP (XEXP (SET_SRC (pat
), 0), 0);
17164 /* Find the first flag setting insn before INSN in basic block BB. */
17165 gcc_assert (insn
!= BB_HEAD (bb
));
17166 for (prev
= PREV_INSN (insn
);
17168 && prev
!= BB_HEAD (bb
)
17170 || DEBUG_INSN_P (prev
)
17171 || ((set
= single_set (prev
)) != NULL
17172 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17173 prev
= PREV_INSN (prev
))
17175 if (reg_set_p (op0
, prev
))
17176 insn_clobbered
= true;
17179 /* Skip if op0 is clobbered by insn other than prev. */
17180 if (insn_clobbered
)
17186 dest
= SET_DEST (set
);
17187 src
= SET_SRC (set
);
17188 if (!low_register_operand (dest
, SImode
)
17189 || !low_register_operand (src
, SImode
))
17192 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17193 in INSN. Both src and dest of the move insn are checked. */
17194 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17196 dest
= copy_rtx (dest
);
17197 src
= copy_rtx (src
);
17198 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17199 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17200 INSN_CODE (prev
) = -1;
17201 /* Set test register in INSN to dest. */
17202 XEXP (XEXP (SET_SRC (pat
), 0), 0) = copy_rtx (dest
);
17203 INSN_CODE (insn
) = -1;
17208 /* Convert instructions to their cc-clobbering variant if possible, since
17209 that allows us to use smaller encodings. */
17212 thumb2_reorg (void)
17217 INIT_REG_SET (&live
);
17219 /* We are freeing block_for_insn in the toplev to keep compatibility
17220 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17221 compute_bb_for_insn ();
17224 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17226 FOR_EACH_BB_FN (bb
, cfun
)
17228 if ((current_tune
->disparage_flag_setting_t16_encodings
17229 == tune_params::DISPARAGE_FLAGS_ALL
)
17230 && optimize_bb_for_speed_p (bb
))
17234 Convert_Action action
= SKIP
;
17235 Convert_Action action_for_partial_flag_setting
17236 = ((current_tune
->disparage_flag_setting_t16_encodings
17237 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17238 && optimize_bb_for_speed_p (bb
))
17241 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17242 df_simulate_initialize_backwards (bb
, &live
);
17243 FOR_BB_INSNS_REVERSE (bb
, insn
)
17245 if (NONJUMP_INSN_P (insn
)
17246 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17247 && GET_CODE (PATTERN (insn
)) == SET
)
17250 rtx pat
= PATTERN (insn
);
17251 rtx dst
= XEXP (pat
, 0);
17252 rtx src
= XEXP (pat
, 1);
17253 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17255 if (UNARY_P (src
) || BINARY_P (src
))
17256 op0
= XEXP (src
, 0);
17258 if (BINARY_P (src
))
17259 op1
= XEXP (src
, 1);
17261 if (low_register_operand (dst
, SImode
))
17263 switch (GET_CODE (src
))
17266 /* Adding two registers and storing the result
17267 in the first source is already a 16-bit
17269 if (rtx_equal_p (dst
, op0
)
17270 && register_operand (op1
, SImode
))
17273 if (low_register_operand (op0
, SImode
))
17275 /* ADDS <Rd>,<Rn>,<Rm> */
17276 if (low_register_operand (op1
, SImode
))
17278 /* ADDS <Rdn>,#<imm8> */
17279 /* SUBS <Rdn>,#<imm8> */
17280 else if (rtx_equal_p (dst
, op0
)
17281 && CONST_INT_P (op1
)
17282 && IN_RANGE (INTVAL (op1
), -255, 255))
17284 /* ADDS <Rd>,<Rn>,#<imm3> */
17285 /* SUBS <Rd>,<Rn>,#<imm3> */
17286 else if (CONST_INT_P (op1
)
17287 && IN_RANGE (INTVAL (op1
), -7, 7))
17290 /* ADCS <Rd>, <Rn> */
17291 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17292 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17293 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17295 && COMPARISON_P (op1
)
17296 && cc_register (XEXP (op1
, 0), VOIDmode
)
17297 && maybe_get_arm_condition_code (op1
) == ARM_CS
17298 && XEXP (op1
, 1) == const0_rtx
)
17303 /* RSBS <Rd>,<Rn>,#0
17304 Not handled here: see NEG below. */
17305 /* SUBS <Rd>,<Rn>,#<imm3>
17307 Not handled here: see PLUS above. */
17308 /* SUBS <Rd>,<Rn>,<Rm> */
17309 if (low_register_operand (op0
, SImode
)
17310 && low_register_operand (op1
, SImode
))
17315 /* MULS <Rdm>,<Rn>,<Rdm>
17316 As an exception to the rule, this is only used
17317 when optimizing for size since MULS is slow on all
17318 known implementations. We do not even want to use
17319 MULS in cold code, if optimizing for speed, so we
17320 test the global flag here. */
17321 if (!optimize_size
)
17323 /* else fall through. */
17327 /* ANDS <Rdn>,<Rm> */
17328 if (rtx_equal_p (dst
, op0
)
17329 && low_register_operand (op1
, SImode
))
17330 action
= action_for_partial_flag_setting
;
17331 else if (rtx_equal_p (dst
, op1
)
17332 && low_register_operand (op0
, SImode
))
17333 action
= action_for_partial_flag_setting
== SKIP
17334 ? SKIP
: SWAP_CONV
;
17340 /* ASRS <Rdn>,<Rm> */
17341 /* LSRS <Rdn>,<Rm> */
17342 /* LSLS <Rdn>,<Rm> */
17343 if (rtx_equal_p (dst
, op0
)
17344 && low_register_operand (op1
, SImode
))
17345 action
= action_for_partial_flag_setting
;
17346 /* ASRS <Rd>,<Rm>,#<imm5> */
17347 /* LSRS <Rd>,<Rm>,#<imm5> */
17348 /* LSLS <Rd>,<Rm>,#<imm5> */
17349 else if (low_register_operand (op0
, SImode
)
17350 && CONST_INT_P (op1
)
17351 && IN_RANGE (INTVAL (op1
), 0, 31))
17352 action
= action_for_partial_flag_setting
;
17356 /* RORS <Rdn>,<Rm> */
17357 if (rtx_equal_p (dst
, op0
)
17358 && low_register_operand (op1
, SImode
))
17359 action
= action_for_partial_flag_setting
;
17363 /* MVNS <Rd>,<Rm> */
17364 if (low_register_operand (op0
, SImode
))
17365 action
= action_for_partial_flag_setting
;
17369 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17370 if (low_register_operand (op0
, SImode
))
17375 /* MOVS <Rd>,#<imm8> */
17376 if (CONST_INT_P (src
)
17377 && IN_RANGE (INTVAL (src
), 0, 255))
17378 action
= action_for_partial_flag_setting
;
17382 /* MOVS and MOV<c> with registers have different
17383 encodings, so are not relevant here. */
17391 if (action
!= SKIP
)
17393 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17394 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17397 if (action
== SWAP_CONV
)
17399 src
= copy_rtx (src
);
17400 XEXP (src
, 0) = op1
;
17401 XEXP (src
, 1) = op0
;
17402 pat
= gen_rtx_SET (dst
, src
);
17403 vec
= gen_rtvec (2, pat
, clobber
);
17405 else /* action == CONV */
17406 vec
= gen_rtvec (2, pat
, clobber
);
17408 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17409 INSN_CODE (insn
) = -1;
17413 if (NONDEBUG_INSN_P (insn
))
17414 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17418 CLEAR_REG_SET (&live
);
17421 /* Gcc puts the pool in the wrong place for ARM, since we can only
17422 load addresses a limited distance around the pc. We do some
17423 special munging to move the constant pool values to the correct
17424 point in the code. */
17429 HOST_WIDE_INT address
= 0;
17434 else if (TARGET_THUMB2
)
17437 /* Ensure all insns that must be split have been split at this point.
17438 Otherwise, the pool placement code below may compute incorrect
17439 insn lengths. Note that when optimizing, all insns have already
17440 been split at this point. */
17442 split_all_insns_noflow ();
17444 minipool_fix_head
= minipool_fix_tail
= NULL
;
17446 /* The first insn must always be a note, or the code below won't
17447 scan it properly. */
17448 insn
= get_insns ();
17449 gcc_assert (NOTE_P (insn
));
17452 /* Scan all the insns and record the operands that will need fixing. */
17453 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17455 if (BARRIER_P (insn
))
17456 push_minipool_barrier (insn
, address
);
17457 else if (INSN_P (insn
))
17459 rtx_jump_table_data
*table
;
17461 note_invalid_constants (insn
, address
, true);
17462 address
+= get_attr_length (insn
);
17464 /* If the insn is a vector jump, add the size of the table
17465 and skip the table. */
17466 if (tablejump_p (insn
, NULL
, &table
))
17468 address
+= get_jump_table_size (table
);
17472 else if (LABEL_P (insn
))
17473 /* Add the worst-case padding due to alignment. We don't add
17474 the _current_ padding because the minipool insertions
17475 themselves might change it. */
17476 address
+= get_label_padding (insn
);
17479 fix
= minipool_fix_head
;
17481 /* Now scan the fixups and perform the required changes. */
17486 Mfix
* last_added_fix
;
17487 Mfix
* last_barrier
= NULL
;
17490 /* Skip any further barriers before the next fix. */
17491 while (fix
&& BARRIER_P (fix
->insn
))
17494 /* No more fixes. */
17498 last_added_fix
= NULL
;
17500 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17502 if (BARRIER_P (ftmp
->insn
))
17504 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17507 last_barrier
= ftmp
;
17509 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17512 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17515 /* If we found a barrier, drop back to that; any fixes that we
17516 could have reached but come after the barrier will now go in
17517 the next mini-pool. */
17518 if (last_barrier
!= NULL
)
17520 /* Reduce the refcount for those fixes that won't go into this
17522 for (fdel
= last_barrier
->next
;
17523 fdel
&& fdel
!= ftmp
;
17526 fdel
->minipool
->refcount
--;
17527 fdel
->minipool
= NULL
;
17530 ftmp
= last_barrier
;
17534 /* ftmp is first fix that we can't fit into this pool and
17535 there no natural barriers that we could use. Insert a
17536 new barrier in the code somewhere between the previous
17537 fix and this one, and arrange to jump around it. */
17538 HOST_WIDE_INT max_address
;
17540 /* The last item on the list of fixes must be a barrier, so
17541 we can never run off the end of the list of fixes without
17542 last_barrier being set. */
17545 max_address
= minipool_vector_head
->max_address
;
17546 /* Check that there isn't another fix that is in range that
17547 we couldn't fit into this pool because the pool was
17548 already too large: we need to put the pool before such an
17549 instruction. The pool itself may come just after the
17550 fix because create_fix_barrier also allows space for a
17551 jump instruction. */
17552 if (ftmp
->address
< max_address
)
17553 max_address
= ftmp
->address
+ 1;
17555 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17558 assign_minipool_offsets (last_barrier
);
17562 if (!BARRIER_P (ftmp
->insn
)
17563 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17570 /* Scan over the fixes we have identified for this pool, fixing them
17571 up and adding the constants to the pool itself. */
17572 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17573 this_fix
= this_fix
->next
)
17574 if (!BARRIER_P (this_fix
->insn
))
17577 = plus_constant (Pmode
,
17578 gen_rtx_LABEL_REF (VOIDmode
,
17579 minipool_vector_label
),
17580 this_fix
->minipool
->offset
);
17581 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17584 dump_minipool (last_barrier
->insn
);
17588 /* From now on we must synthesize any constants that we can't handle
17589 directly. This can happen if the RTL gets split during final
17590 instruction generation. */
17591 cfun
->machine
->after_arm_reorg
= 1;
17593 /* Free the minipool memory. */
17594 obstack_free (&minipool_obstack
, minipool_startobj
);
17597 /* Routines to output assembly language. */
17599 /* Return string representation of passed in real value. */
17600 static const char *
17601 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17603 if (!fp_consts_inited
)
17606 gcc_assert (real_equal (r
, &value_fp0
));
17610 /* OPERANDS[0] is the entire list of insns that constitute pop,
17611 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17612 is in the list, UPDATE is true iff the list contains explicit
17613 update of base register. */
17615 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17621 const char *conditional
;
17622 int num_saves
= XVECLEN (operands
[0], 0);
17623 unsigned int regno
;
17624 unsigned int regno_base
= REGNO (operands
[1]);
17627 offset
+= update
? 1 : 0;
17628 offset
+= return_pc
? 1 : 0;
17630 /* Is the base register in the list? */
17631 for (i
= offset
; i
< num_saves
; i
++)
17633 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17634 /* If SP is in the list, then the base register must be SP. */
17635 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17636 /* If base register is in the list, there must be no explicit update. */
17637 if (regno
== regno_base
)
17638 gcc_assert (!update
);
17641 conditional
= reverse
? "%?%D0" : "%?%d0";
17642 if ((regno_base
== SP_REGNUM
) && TARGET_THUMB
)
17644 /* Output pop (not stmfd) because it has a shorter encoding. */
17645 gcc_assert (update
);
17646 sprintf (pattern
, "pop%s\t{", conditional
);
17650 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17651 It's just a convention, their semantics are identical. */
17652 if (regno_base
== SP_REGNUM
)
17653 sprintf (pattern
, "ldm%sfd\t", conditional
);
17654 else if (TARGET_UNIFIED_ASM
)
17655 sprintf (pattern
, "ldmia%s\t", conditional
);
17657 sprintf (pattern
, "ldm%sia\t", conditional
);
17659 strcat (pattern
, reg_names
[regno_base
]);
17661 strcat (pattern
, "!, {");
17663 strcat (pattern
, ", {");
17666 /* Output the first destination register. */
17668 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17670 /* Output the rest of the destination registers. */
17671 for (i
= offset
+ 1; i
< num_saves
; i
++)
17673 strcat (pattern
, ", ");
17675 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17678 strcat (pattern
, "}");
17680 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc
)
17681 strcat (pattern
, "^");
17683 output_asm_insn (pattern
, &cond
);
17687 /* Output the assembly for a store multiple. */
17690 vfp_output_vstmd (rtx
* operands
)
17696 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17697 ? XEXP (operands
[0], 0)
17698 : XEXP (XEXP (operands
[0], 0), 0);
17699 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17702 strcpy (pattern
, "vpush%?.64\t{%P1");
17704 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17706 p
= strlen (pattern
);
17708 gcc_assert (REG_P (operands
[1]));
17710 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17711 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17713 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17715 strcpy (&pattern
[p
], "}");
17717 output_asm_insn (pattern
, operands
);
17722 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17723 number of bytes pushed. */
17726 vfp_emit_fstmd (int base_reg
, int count
)
17733 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17734 register pairs are stored by a store multiple insn. We avoid this
17735 by pushing an extra pair. */
17736 if (count
== 2 && !arm_arch6
)
17738 if (base_reg
== LAST_VFP_REGNUM
- 3)
17743 /* FSTMD may not store more than 16 doubleword registers at once. Split
17744 larger stores into multiple parts (up to a maximum of two, in
17749 /* NOTE: base_reg is an internal register number, so each D register
17751 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17752 saved
+= vfp_emit_fstmd (base_reg
, 16);
17756 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17757 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17759 reg
= gen_rtx_REG (DFmode
, base_reg
);
17762 XVECEXP (par
, 0, 0)
17763 = gen_rtx_SET (gen_frame_mem
17765 gen_rtx_PRE_MODIFY (Pmode
,
17768 (Pmode
, stack_pointer_rtx
,
17771 gen_rtx_UNSPEC (BLKmode
,
17772 gen_rtvec (1, reg
),
17773 UNSPEC_PUSH_MULT
));
17775 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17776 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17777 RTX_FRAME_RELATED_P (tmp
) = 1;
17778 XVECEXP (dwarf
, 0, 0) = tmp
;
17780 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17781 RTX_FRAME_RELATED_P (tmp
) = 1;
17782 XVECEXP (dwarf
, 0, 1) = tmp
;
17784 for (i
= 1; i
< count
; i
++)
17786 reg
= gen_rtx_REG (DFmode
, base_reg
);
17788 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17790 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17791 plus_constant (Pmode
,
17795 RTX_FRAME_RELATED_P (tmp
) = 1;
17796 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17799 par
= emit_insn (par
);
17800 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17801 RTX_FRAME_RELATED_P (par
) = 1;
17806 /* Emit a call instruction with pattern PAT. ADDR is the address of
17807 the call target. */
17810 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17814 insn
= emit_call_insn (pat
);
17816 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17817 If the call might use such an entry, add a use of the PIC register
17818 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17819 if (TARGET_VXWORKS_RTP
17822 && GET_CODE (addr
) == SYMBOL_REF
17823 && (SYMBOL_REF_DECL (addr
)
17824 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17825 : !SYMBOL_REF_LOCAL_P (addr
)))
17827 require_pic_register ();
17828 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17831 if (TARGET_AAPCS_BASED
)
17833 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17834 linker. We need to add an IP clobber to allow setting
17835 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17836 is not needed since it's a fixed register. */
17837 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17838 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17842 /* Output a 'call' insn. */
17844 output_call (rtx
*operands
)
17846 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17848 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17849 if (REGNO (operands
[0]) == LR_REGNUM
)
17851 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17852 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17855 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17857 if (TARGET_INTERWORK
|| arm_arch4t
)
17858 output_asm_insn ("bx%?\t%0", operands
);
17860 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17865 /* Output a 'call' insn that is a reference in memory. This is
17866 disabled for ARMv5 and we prefer a blx instead because otherwise
17867 there's a significant performance overhead. */
17869 output_call_mem (rtx
*operands
)
17871 gcc_assert (!arm_arch5
);
17872 if (TARGET_INTERWORK
)
17874 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17875 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17876 output_asm_insn ("bx%?\t%|ip", operands
);
17878 else if (regno_use_in (LR_REGNUM
, operands
[0]))
17880 /* LR is used in the memory address. We load the address in the
17881 first instruction. It's safe to use IP as the target of the
17882 load since the call will kill it anyway. */
17883 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
17884 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17886 output_asm_insn ("bx%?\t%|ip", operands
);
17888 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
17892 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17893 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
17900 /* Output a move from arm registers to arm registers of a long double
17901 OPERANDS[0] is the destination.
17902 OPERANDS[1] is the source. */
17904 output_mov_long_double_arm_from_arm (rtx
*operands
)
17906 /* We have to be careful here because the two might overlap. */
17907 int dest_start
= REGNO (operands
[0]);
17908 int src_start
= REGNO (operands
[1]);
17912 if (dest_start
< src_start
)
17914 for (i
= 0; i
< 3; i
++)
17916 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17917 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17918 output_asm_insn ("mov%?\t%0, %1", ops
);
17923 for (i
= 2; i
>= 0; i
--)
17925 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17926 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17927 output_asm_insn ("mov%?\t%0, %1", ops
);
17935 arm_emit_movpair (rtx dest
, rtx src
)
17939 /* If the src is an immediate, simplify it. */
17940 if (CONST_INT_P (src
))
17942 HOST_WIDE_INT val
= INTVAL (src
);
17943 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17944 if ((val
>> 16) & 0x0000ffff)
17946 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17948 GEN_INT ((val
>> 16) & 0x0000ffff));
17949 insn
= get_last_insn ();
17950 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17954 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17955 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17956 insn
= get_last_insn ();
17957 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17960 /* Output a move between double words. It must be REG<-MEM
17963 output_move_double (rtx
*operands
, bool emit
, int *count
)
17965 enum rtx_code code0
= GET_CODE (operands
[0]);
17966 enum rtx_code code1
= GET_CODE (operands
[1]);
17971 /* The only case when this might happen is when
17972 you are looking at the length of a DImode instruction
17973 that has an invalid constant in it. */
17974 if (code0
== REG
&& code1
!= MEM
)
17976 gcc_assert (!emit
);
17983 unsigned int reg0
= REGNO (operands
[0]);
17985 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17987 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17989 switch (GET_CODE (XEXP (operands
[1], 0)))
17996 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17997 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
17999 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18004 gcc_assert (TARGET_LDRD
);
18006 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
18013 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
18015 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
18023 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
18025 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
18030 gcc_assert (TARGET_LDRD
);
18032 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
18037 /* Autoicrement addressing modes should never have overlapping
18038 base and destination registers, and overlapping index registers
18039 are already prohibited, so this doesn't need to worry about
18041 otherops
[0] = operands
[0];
18042 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18043 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18045 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18047 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18049 /* Registers overlap so split out the increment. */
18052 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18053 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
18060 /* Use a single insn if we can.
18061 FIXME: IWMMXT allows offsets larger than ldrd can
18062 handle, fix these up with a pair of ldr. */
18064 || !CONST_INT_P (otherops
[2])
18065 || (INTVAL (otherops
[2]) > -256
18066 && INTVAL (otherops
[2]) < 256))
18069 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
18075 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18076 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18086 /* Use a single insn if we can.
18087 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18088 fix these up with a pair of ldr. */
18090 || !CONST_INT_P (otherops
[2])
18091 || (INTVAL (otherops
[2]) > -256
18092 && INTVAL (otherops
[2]) < 256))
18095 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
18101 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18102 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18112 /* We might be able to use ldrd %0, %1 here. However the range is
18113 different to ldr/adr, and it is broken on some ARMv7-M
18114 implementations. */
18115 /* Use the second register of the pair to avoid problematic
18117 otherops
[1] = operands
[1];
18119 output_asm_insn ("adr%?\t%0, %1", otherops
);
18120 operands
[1] = otherops
[0];
18124 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18126 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
18133 /* ??? This needs checking for thumb2. */
18135 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18136 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18138 otherops
[0] = operands
[0];
18139 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18140 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18142 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18144 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18146 switch ((int) INTVAL (otherops
[2]))
18150 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
18156 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
18162 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
18166 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18167 operands
[1] = otherops
[0];
18169 && (REG_P (otherops
[2])
18171 || (CONST_INT_P (otherops
[2])
18172 && INTVAL (otherops
[2]) > -256
18173 && INTVAL (otherops
[2]) < 256)))
18175 if (reg_overlap_mentioned_p (operands
[0],
18178 /* Swap base and index registers over to
18179 avoid a conflict. */
18180 std::swap (otherops
[1], otherops
[2]);
18182 /* If both registers conflict, it will usually
18183 have been fixed by a splitter. */
18184 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18185 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18189 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18190 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
18197 otherops
[0] = operands
[0];
18199 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
18204 if (CONST_INT_P (otherops
[2]))
18208 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18209 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18211 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18217 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18223 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18230 return "ldr%(d%)\t%0, [%1]";
18232 return "ldm%(ia%)\t%1, %M0";
18236 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18237 /* Take care of overlapping base/data reg. */
18238 if (reg_mentioned_p (operands
[0], operands
[1]))
18242 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18243 output_asm_insn ("ldr%?\t%0, %1", operands
);
18253 output_asm_insn ("ldr%?\t%0, %1", operands
);
18254 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18264 /* Constraints should ensure this. */
18265 gcc_assert (code0
== MEM
&& code1
== REG
);
18266 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18267 || (TARGET_ARM
&& TARGET_LDRD
));
18269 switch (GET_CODE (XEXP (operands
[0], 0)))
18275 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
18277 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18282 gcc_assert (TARGET_LDRD
);
18284 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
18291 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
18293 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
18301 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
18303 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
18308 gcc_assert (TARGET_LDRD
);
18310 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
18315 otherops
[0] = operands
[1];
18316 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18317 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18319 /* IWMMXT allows offsets larger than ldrd can handle,
18320 fix these up with a pair of ldr. */
18322 && CONST_INT_P (otherops
[2])
18323 && (INTVAL(otherops
[2]) <= -256
18324 || INTVAL(otherops
[2]) >= 256))
18326 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18330 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18331 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18340 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18341 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18347 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18350 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
18355 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
18360 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18361 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18363 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18367 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
18374 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
18381 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
18386 && (REG_P (otherops
[2])
18388 || (CONST_INT_P (otherops
[2])
18389 && INTVAL (otherops
[2]) > -256
18390 && INTVAL (otherops
[2]) < 256)))
18392 otherops
[0] = operands
[1];
18393 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18395 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
18401 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18402 otherops
[1] = operands
[1];
18405 output_asm_insn ("str%?\t%1, %0", operands
);
18406 output_asm_insn ("str%?\t%H1, %0", otherops
);
18416 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18417 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18420 output_move_quad (rtx
*operands
)
18422 if (REG_P (operands
[0]))
18424 /* Load, or reg->reg move. */
18426 if (MEM_P (operands
[1]))
18428 switch (GET_CODE (XEXP (operands
[1], 0)))
18431 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
18436 output_asm_insn ("adr%?\t%0, %1", operands
);
18437 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
18441 gcc_unreachable ();
18449 gcc_assert (REG_P (operands
[1]));
18451 dest
= REGNO (operands
[0]);
18452 src
= REGNO (operands
[1]);
18454 /* This seems pretty dumb, but hopefully GCC won't try to do it
18457 for (i
= 0; i
< 4; i
++)
18459 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18460 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18461 output_asm_insn ("mov%?\t%0, %1", ops
);
18464 for (i
= 3; i
>= 0; i
--)
18466 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18467 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18468 output_asm_insn ("mov%?\t%0, %1", ops
);
18474 gcc_assert (MEM_P (operands
[0]));
18475 gcc_assert (REG_P (operands
[1]));
18476 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18478 switch (GET_CODE (XEXP (operands
[0], 0)))
18481 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
18485 gcc_unreachable ();
18492 /* Output a VFP load or store instruction. */
18495 output_move_vfp (rtx
*operands
)
18497 rtx reg
, mem
, addr
, ops
[2];
18498 int load
= REG_P (operands
[0]);
18499 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18500 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18505 reg
= operands
[!load
];
18506 mem
= operands
[load
];
18508 mode
= GET_MODE (reg
);
18510 gcc_assert (REG_P (reg
));
18511 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18512 gcc_assert (mode
== SFmode
18516 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18517 gcc_assert (MEM_P (mem
));
18519 addr
= XEXP (mem
, 0);
18521 switch (GET_CODE (addr
))
18524 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18525 ops
[0] = XEXP (addr
, 0);
18530 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18531 ops
[0] = XEXP (addr
, 0);
18536 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18542 sprintf (buff
, templ
,
18543 load
? "ld" : "st",
18546 integer_p
? "\t%@ int" : "");
18547 output_asm_insn (buff
, ops
);
18552 /* Output a Neon double-word or quad-word load or store, or a load
18553 or store for larger structure modes.
18555 WARNING: The ordering of elements is weird in big-endian mode,
18556 because the EABI requires that vectors stored in memory appear
18557 as though they were stored by a VSTM, as required by the EABI.
18558 GCC RTL defines element ordering based on in-memory order.
18559 This can be different from the architectural ordering of elements
18560 within a NEON register. The intrinsics defined in arm_neon.h use the
18561 NEON register element ordering, not the GCC RTL element ordering.
18563 For example, the in-memory ordering of a big-endian a quadword
18564 vector with 16-bit elements when stored from register pair {d0,d1}
18565 will be (lowest address first, d0[N] is NEON register element N):
18567 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18569 When necessary, quadword registers (dN, dN+1) are moved to ARM
18570 registers from rN in the order:
18572 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18574 So that STM/LDM can be used on vectors in ARM registers, and the
18575 same memory layout will result as if VSTM/VLDM were used.
18577 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18578 possible, which allows use of appropriate alignment tags.
18579 Note that the choice of "64" is independent of the actual vector
18580 element size; this size simply ensures that the behavior is
18581 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18583 Due to limitations of those instructions, use of VST1.64/VLD1.64
18584 is not possible if:
18585 - the address contains PRE_DEC, or
18586 - the mode refers to more than 4 double-word registers
18588 In those cases, it would be possible to replace VSTM/VLDM by a
18589 sequence of instructions; this is not currently implemented since
18590 this is not certain to actually improve performance. */
18593 output_move_neon (rtx
*operands
)
18595 rtx reg
, mem
, addr
, ops
[2];
18596 int regno
, nregs
, load
= REG_P (operands
[0]);
18601 reg
= operands
[!load
];
18602 mem
= operands
[load
];
18604 mode
= GET_MODE (reg
);
18606 gcc_assert (REG_P (reg
));
18607 regno
= REGNO (reg
);
18608 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18609 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18610 || NEON_REGNO_OK_FOR_QUAD (regno
));
18611 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18612 || VALID_NEON_QREG_MODE (mode
)
18613 || VALID_NEON_STRUCT_MODE (mode
));
18614 gcc_assert (MEM_P (mem
));
18616 addr
= XEXP (mem
, 0);
18618 /* Strip off const from addresses like (const (plus (...))). */
18619 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18620 addr
= XEXP (addr
, 0);
18622 switch (GET_CODE (addr
))
18625 /* We have to use vldm / vstm for too-large modes. */
18628 templ
= "v%smia%%?\t%%0!, %%h1";
18629 ops
[0] = XEXP (addr
, 0);
18633 templ
= "v%s1.64\t%%h1, %%A0";
18640 /* We have to use vldm / vstm in this case, since there is no
18641 pre-decrement form of the vld1 / vst1 instructions. */
18642 templ
= "v%smdb%%?\t%%0!, %%h1";
18643 ops
[0] = XEXP (addr
, 0);
18648 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18649 gcc_unreachable ();
18652 /* We have to use vldm / vstm for too-large modes. */
18656 templ
= "v%smia%%?\t%%m0, %%h1";
18658 templ
= "v%s1.64\t%%h1, %%A0";
18664 /* Fall through. */
18670 for (i
= 0; i
< nregs
; i
++)
18672 /* We're only using DImode here because it's a convenient size. */
18673 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18674 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18675 if (reg_overlap_mentioned_p (ops
[0], mem
))
18677 gcc_assert (overlap
== -1);
18682 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18683 output_asm_insn (buff
, ops
);
18688 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18689 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18690 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18691 output_asm_insn (buff
, ops
);
18698 gcc_unreachable ();
18701 sprintf (buff
, templ
, load
? "ld" : "st");
18702 output_asm_insn (buff
, ops
);
18707 /* Compute and return the length of neon_mov<mode>, where <mode> is
18708 one of VSTRUCT modes: EI, OI, CI or XI. */
18710 arm_attr_length_move_neon (rtx_insn
*insn
)
18712 rtx reg
, mem
, addr
;
18716 extract_insn_cached (insn
);
18718 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18720 mode
= GET_MODE (recog_data
.operand
[0]);
18731 gcc_unreachable ();
18735 load
= REG_P (recog_data
.operand
[0]);
18736 reg
= recog_data
.operand
[!load
];
18737 mem
= recog_data
.operand
[load
];
18739 gcc_assert (MEM_P (mem
));
18741 mode
= GET_MODE (reg
);
18742 addr
= XEXP (mem
, 0);
18744 /* Strip off const from addresses like (const (plus (...))). */
18745 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18746 addr
= XEXP (addr
, 0);
18748 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18750 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18757 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18761 arm_address_offset_is_imm (rtx_insn
*insn
)
18765 extract_insn_cached (insn
);
18767 if (REG_P (recog_data
.operand
[0]))
18770 mem
= recog_data
.operand
[0];
18772 gcc_assert (MEM_P (mem
));
18774 addr
= XEXP (mem
, 0);
18777 || (GET_CODE (addr
) == PLUS
18778 && REG_P (XEXP (addr
, 0))
18779 && CONST_INT_P (XEXP (addr
, 1))))
18785 /* Output an ADD r, s, #n where n may be too big for one instruction.
18786 If adding zero to one register, output nothing. */
18788 output_add_immediate (rtx
*operands
)
18790 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18792 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18795 output_multi_immediate (operands
,
18796 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18799 output_multi_immediate (operands
,
18800 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18807 /* Output a multiple immediate operation.
18808 OPERANDS is the vector of operands referred to in the output patterns.
18809 INSTR1 is the output pattern to use for the first constant.
18810 INSTR2 is the output pattern to use for subsequent constants.
18811 IMMED_OP is the index of the constant slot in OPERANDS.
18812 N is the constant value. */
18813 static const char *
18814 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18815 int immed_op
, HOST_WIDE_INT n
)
18817 #if HOST_BITS_PER_WIDE_INT > 32
18823 /* Quick and easy output. */
18824 operands
[immed_op
] = const0_rtx
;
18825 output_asm_insn (instr1
, operands
);
18830 const char * instr
= instr1
;
18832 /* Note that n is never zero here (which would give no output). */
18833 for (i
= 0; i
< 32; i
+= 2)
18837 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18838 output_asm_insn (instr
, operands
);
18848 /* Return the name of a shifter operation. */
18849 static const char *
18850 arm_shift_nmem(enum rtx_code code
)
18855 return ARM_LSL_NAME
;
18871 /* Return the appropriate ARM instruction for the operation code.
18872 The returned result should not be overwritten. OP is the rtx of the
18873 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18876 arithmetic_instr (rtx op
, int shift_first_arg
)
18878 switch (GET_CODE (op
))
18884 return shift_first_arg
? "rsb" : "sub";
18899 return arm_shift_nmem(GET_CODE(op
));
18902 gcc_unreachable ();
18906 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18907 for the operation code. The returned result should not be overwritten.
18908 OP is the rtx code of the shift.
18909 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18911 static const char *
18912 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18915 enum rtx_code code
= GET_CODE (op
);
18920 if (!CONST_INT_P (XEXP (op
, 1)))
18922 output_operand_lossage ("invalid shift operand");
18927 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18935 mnem
= arm_shift_nmem(code
);
18936 if (CONST_INT_P (XEXP (op
, 1)))
18938 *amountp
= INTVAL (XEXP (op
, 1));
18940 else if (REG_P (XEXP (op
, 1)))
18947 output_operand_lossage ("invalid shift operand");
18953 /* We never have to worry about the amount being other than a
18954 power of 2, since this case can never be reloaded from a reg. */
18955 if (!CONST_INT_P (XEXP (op
, 1)))
18957 output_operand_lossage ("invalid shift operand");
18961 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18963 /* Amount must be a power of two. */
18964 if (*amountp
& (*amountp
- 1))
18966 output_operand_lossage ("invalid shift operand");
18970 *amountp
= int_log2 (*amountp
);
18971 return ARM_LSL_NAME
;
18974 output_operand_lossage ("invalid shift operand");
18978 /* This is not 100% correct, but follows from the desire to merge
18979 multiplication by a power of 2 with the recognizer for a
18980 shift. >=32 is not a valid shift for "lsl", so we must try and
18981 output a shift that produces the correct arithmetical result.
18982 Using lsr #32 is identical except for the fact that the carry bit
18983 is not set correctly if we set the flags; but we never use the
18984 carry bit from such an operation, so we can ignore that. */
18985 if (code
== ROTATERT
)
18986 /* Rotate is just modulo 32. */
18988 else if (*amountp
!= (*amountp
& 31))
18990 if (code
== ASHIFT
)
18995 /* Shifts of 0 are no-ops. */
19002 /* Obtain the shift from the POWER of two. */
19004 static HOST_WIDE_INT
19005 int_log2 (HOST_WIDE_INT power
)
19007 HOST_WIDE_INT shift
= 0;
19009 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
19011 gcc_assert (shift
<= 31);
19018 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19019 because /bin/as is horribly restrictive. The judgement about
19020 whether or not each character is 'printable' (and can be output as
19021 is) or not (and must be printed with an octal escape) must be made
19022 with reference to the *host* character set -- the situation is
19023 similar to that discussed in the comments above pp_c_char in
19024 c-pretty-print.c. */
19026 #define MAX_ASCII_LEN 51
19029 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19032 int len_so_far
= 0;
19034 fputs ("\t.ascii\t\"", stream
);
19036 for (i
= 0; i
< len
; i
++)
19040 if (len_so_far
>= MAX_ASCII_LEN
)
19042 fputs ("\"\n\t.ascii\t\"", stream
);
19048 if (c
== '\\' || c
== '\"')
19050 putc ('\\', stream
);
19058 fprintf (stream
, "\\%03o", c
);
19063 fputs ("\"\n", stream
);
19066 /* Whether a register is callee saved or not. This is necessary because high
19067 registers are marked as caller saved when optimizing for size on Thumb-1
19068 targets despite being callee saved in order to avoid using them. */
19069 #define callee_saved_reg_p(reg) \
19070 (!call_used_regs[reg] \
19071 || (TARGET_THUMB1 && optimize_size \
19072 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19074 /* Compute the register save mask for registers 0 through 12
19075 inclusive. This code is used by arm_compute_save_reg_mask. */
19077 static unsigned long
19078 arm_compute_save_reg0_reg12_mask (void)
19080 unsigned long func_type
= arm_current_func_type ();
19081 unsigned long save_reg_mask
= 0;
19084 if (IS_INTERRUPT (func_type
))
19086 unsigned int max_reg
;
19087 /* Interrupt functions must not corrupt any registers,
19088 even call clobbered ones. If this is a leaf function
19089 we can just examine the registers used by the RTL, but
19090 otherwise we have to assume that whatever function is
19091 called might clobber anything, and so we have to save
19092 all the call-clobbered registers as well. */
19093 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19094 /* FIQ handlers have registers r8 - r12 banked, so
19095 we only need to check r0 - r7, Normal ISRs only
19096 bank r14 and r15, so we must check up to r12.
19097 r13 is the stack pointer which is always preserved,
19098 so we do not need to consider it here. */
19103 for (reg
= 0; reg
<= max_reg
; reg
++)
19104 if (df_regs_ever_live_p (reg
)
19105 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19106 save_reg_mask
|= (1 << reg
);
19108 /* Also save the pic base register if necessary. */
19110 && !TARGET_SINGLE_PIC_BASE
19111 && arm_pic_register
!= INVALID_REGNUM
19112 && crtl
->uses_pic_offset_table
)
19113 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19115 else if (IS_VOLATILE(func_type
))
19117 /* For noreturn functions we historically omitted register saves
19118 altogether. However this really messes up debugging. As a
19119 compromise save just the frame pointers. Combined with the link
19120 register saved elsewhere this should be sufficient to get
19122 if (frame_pointer_needed
)
19123 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19124 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19125 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19126 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19127 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19131 /* In the normal case we only need to save those registers
19132 which are call saved and which are used by this function. */
19133 for (reg
= 0; reg
<= 11; reg
++)
19134 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19135 save_reg_mask
|= (1 << reg
);
19137 /* Handle the frame pointer as a special case. */
19138 if (frame_pointer_needed
)
19139 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19141 /* If we aren't loading the PIC register,
19142 don't stack it even though it may be live. */
19144 && !TARGET_SINGLE_PIC_BASE
19145 && arm_pic_register
!= INVALID_REGNUM
19146 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19147 || crtl
->uses_pic_offset_table
))
19148 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19150 /* The prologue will copy SP into R0, so save it. */
19151 if (IS_STACKALIGN (func_type
))
19152 save_reg_mask
|= 1;
19155 /* Save registers so the exception handler can modify them. */
19156 if (crtl
->calls_eh_return
)
19162 reg
= EH_RETURN_DATA_REGNO (i
);
19163 if (reg
== INVALID_REGNUM
)
19165 save_reg_mask
|= 1 << reg
;
19169 return save_reg_mask
;
19172 /* Return true if r3 is live at the start of the function. */
19175 arm_r3_live_at_start_p (void)
19177 /* Just look at cfg info, which is still close enough to correct at this
19178 point. This gives false positives for broken functions that might use
19179 uninitialized data that happens to be allocated in r3, but who cares? */
19180 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19183 /* Compute the number of bytes used to store the static chain register on the
19184 stack, above the stack frame. We need to know this accurately to get the
19185 alignment of the rest of the stack frame correct. */
19188 arm_compute_static_chain_stack_bytes (void)
19190 /* See the defining assertion in arm_expand_prologue. */
19191 if (IS_NESTED (arm_current_func_type ())
19192 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19193 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19194 && !df_regs_ever_live_p (LR_REGNUM
)))
19195 && arm_r3_live_at_start_p ()
19196 && crtl
->args
.pretend_args_size
== 0)
19202 /* Compute a bit mask of which registers need to be
19203 saved on the stack for the current function.
19204 This is used by arm_get_frame_offsets, which may add extra registers. */
19206 static unsigned long
19207 arm_compute_save_reg_mask (void)
19209 unsigned int save_reg_mask
= 0;
19210 unsigned long func_type
= arm_current_func_type ();
19213 if (IS_NAKED (func_type
))
19214 /* This should never really happen. */
19217 /* If we are creating a stack frame, then we must save the frame pointer,
19218 IP (which will hold the old stack pointer), LR and the PC. */
19219 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19221 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19224 | (1 << PC_REGNUM
);
19226 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19228 /* Decide if we need to save the link register.
19229 Interrupt routines have their own banked link register,
19230 so they never need to save it.
19231 Otherwise if we do not use the link register we do not need to save
19232 it. If we are pushing other registers onto the stack however, we
19233 can save an instruction in the epilogue by pushing the link register
19234 now and then popping it back into the PC. This incurs extra memory
19235 accesses though, so we only do it when optimizing for size, and only
19236 if we know that we will not need a fancy return sequence. */
19237 if (df_regs_ever_live_p (LR_REGNUM
)
19240 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19241 && !crtl
->tail_call_emit
19242 && !crtl
->calls_eh_return
))
19243 save_reg_mask
|= 1 << LR_REGNUM
;
19245 if (cfun
->machine
->lr_save_eliminated
)
19246 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19248 if (TARGET_REALLY_IWMMXT
19249 && ((bit_count (save_reg_mask
)
19250 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19251 arm_compute_static_chain_stack_bytes())
19254 /* The total number of registers that are going to be pushed
19255 onto the stack is odd. We need to ensure that the stack
19256 is 64-bit aligned before we start to save iWMMXt registers,
19257 and also before we start to create locals. (A local variable
19258 might be a double or long long which we will load/store using
19259 an iWMMXt instruction). Therefore we need to push another
19260 ARM register, so that the stack will be 64-bit aligned. We
19261 try to avoid using the arg registers (r0 -r3) as they might be
19262 used to pass values in a tail call. */
19263 for (reg
= 4; reg
<= 12; reg
++)
19264 if ((save_reg_mask
& (1 << reg
)) == 0)
19268 save_reg_mask
|= (1 << reg
);
19271 cfun
->machine
->sibcall_blocked
= 1;
19272 save_reg_mask
|= (1 << 3);
19276 /* We may need to push an additional register for use initializing the
19277 PIC base register. */
19278 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19279 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19281 reg
= thumb_find_work_register (1 << 4);
19282 if (!call_used_regs
[reg
])
19283 save_reg_mask
|= (1 << reg
);
19286 return save_reg_mask
;
19289 /* Compute a bit mask of which registers need to be
19290 saved on the stack for the current function. */
19291 static unsigned long
19292 thumb1_compute_save_reg_mask (void)
19294 unsigned long mask
;
19298 for (reg
= 0; reg
< 12; reg
++)
19299 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19303 && !TARGET_SINGLE_PIC_BASE
19304 && arm_pic_register
!= INVALID_REGNUM
19305 && crtl
->uses_pic_offset_table
)
19306 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19308 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19309 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19310 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19312 /* LR will also be pushed if any lo regs are pushed. */
19313 if (mask
& 0xff || thumb_force_lr_save ())
19314 mask
|= (1 << LR_REGNUM
);
19316 /* Make sure we have a low work register if we need one.
19317 We will need one if we are going to push a high register,
19318 but we are not currently intending to push a low register. */
19319 if ((mask
& 0xff) == 0
19320 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19322 /* Use thumb_find_work_register to choose which register
19323 we will use. If the register is live then we will
19324 have to push it. Use LAST_LO_REGNUM as our fallback
19325 choice for the register to select. */
19326 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19327 /* Make sure the register returned by thumb_find_work_register is
19328 not part of the return value. */
19329 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19330 reg
= LAST_LO_REGNUM
;
19332 if (callee_saved_reg_p (reg
))
19336 /* The 504 below is 8 bytes less than 512 because there are two possible
19337 alignment words. We can't tell here if they will be present or not so we
19338 have to play it safe and assume that they are. */
19339 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19340 ROUND_UP_WORD (get_frame_size ()) +
19341 crtl
->outgoing_args_size
) >= 504)
19343 /* This is the same as the code in thumb1_expand_prologue() which
19344 determines which register to use for stack decrement. */
19345 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19346 if (mask
& (1 << reg
))
19349 if (reg
> LAST_LO_REGNUM
)
19351 /* Make sure we have a register available for stack decrement. */
19352 mask
|= 1 << LAST_LO_REGNUM
;
19360 /* Return the number of bytes required to save VFP registers. */
19362 arm_get_vfp_saved_size (void)
19364 unsigned int regno
;
19369 /* Space for saved VFP registers. */
19370 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
19373 for (regno
= FIRST_VFP_REGNUM
;
19374 regno
< LAST_VFP_REGNUM
;
19377 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19378 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19382 /* Workaround ARM10 VFPr1 bug. */
19383 if (count
== 2 && !arm_arch6
)
19385 saved
+= count
* 8;
19394 if (count
== 2 && !arm_arch6
)
19396 saved
+= count
* 8;
19403 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19404 everything bar the final return instruction. If simple_return is true,
19405 then do not output epilogue, because it has already been emitted in RTL. */
19407 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19408 bool simple_return
)
19410 char conditional
[10];
19413 unsigned long live_regs_mask
;
19414 unsigned long func_type
;
19415 arm_stack_offsets
*offsets
;
19417 func_type
= arm_current_func_type ();
19419 if (IS_NAKED (func_type
))
19422 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19424 /* If this function was declared non-returning, and we have
19425 found a tail call, then we have to trust that the called
19426 function won't return. */
19431 /* Otherwise, trap an attempted return by aborting. */
19433 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19435 assemble_external_libcall (ops
[1]);
19436 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19442 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19444 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19446 cfun
->machine
->return_used_this_function
= 1;
19448 offsets
= arm_get_frame_offsets ();
19449 live_regs_mask
= offsets
->saved_regs_mask
;
19451 if (!simple_return
&& live_regs_mask
)
19453 const char * return_reg
;
19455 /* If we do not have any special requirements for function exit
19456 (e.g. interworking) then we can load the return address
19457 directly into the PC. Otherwise we must load it into LR. */
19459 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19460 return_reg
= reg_names
[PC_REGNUM
];
19462 return_reg
= reg_names
[LR_REGNUM
];
19464 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19466 /* There are three possible reasons for the IP register
19467 being saved. 1) a stack frame was created, in which case
19468 IP contains the old stack pointer, or 2) an ISR routine
19469 corrupted it, or 3) it was saved to align the stack on
19470 iWMMXt. In case 1, restore IP into SP, otherwise just
19472 if (frame_pointer_needed
)
19474 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19475 live_regs_mask
|= (1 << SP_REGNUM
);
19478 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19481 /* On some ARM architectures it is faster to use LDR rather than
19482 LDM to load a single register. On other architectures, the
19483 cost is the same. In 26 bit mode, or for exception handlers,
19484 we have to use LDM to load the PC so that the CPSR is also
19486 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19487 if (live_regs_mask
== (1U << reg
))
19490 if (reg
<= LAST_ARM_REGNUM
19491 && (reg
!= LR_REGNUM
19493 || ! IS_INTERRUPT (func_type
)))
19495 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19496 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19503 /* Generate the load multiple instruction to restore the
19504 registers. Note we can get here, even if
19505 frame_pointer_needed is true, but only if sp already
19506 points to the base of the saved core registers. */
19507 if (live_regs_mask
& (1 << SP_REGNUM
))
19509 unsigned HOST_WIDE_INT stack_adjust
;
19511 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19512 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19514 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19515 if (TARGET_UNIFIED_ASM
)
19516 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19518 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
19521 /* If we can't use ldmib (SA110 bug),
19522 then try to pop r3 instead. */
19524 live_regs_mask
|= 1 << 3;
19526 if (TARGET_UNIFIED_ASM
)
19527 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19529 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
19533 if (TARGET_UNIFIED_ASM
)
19534 sprintf (instr
, "pop%s\t{", conditional
);
19536 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
19538 p
= instr
+ strlen (instr
);
19540 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19541 if (live_regs_mask
& (1 << reg
))
19543 int l
= strlen (reg_names
[reg
]);
19549 memcpy (p
, ", ", 2);
19553 memcpy (p
, "%|", 2);
19554 memcpy (p
+ 2, reg_names
[reg
], l
);
19558 if (live_regs_mask
& (1 << LR_REGNUM
))
19560 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19561 /* If returning from an interrupt, restore the CPSR. */
19562 if (IS_INTERRUPT (func_type
))
19569 output_asm_insn (instr
, & operand
);
19571 /* See if we need to generate an extra instruction to
19572 perform the actual function return. */
19574 && func_type
!= ARM_FT_INTERWORKED
19575 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19577 /* The return has already been handled
19578 by loading the LR into the PC. */
19585 switch ((int) ARM_FUNC_TYPE (func_type
))
19589 /* ??? This is wrong for unified assembly syntax. */
19590 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19593 case ARM_FT_INTERWORKED
:
19594 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19597 case ARM_FT_EXCEPTION
:
19598 /* ??? This is wrong for unified assembly syntax. */
19599 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19603 /* Use bx if it's available. */
19604 if (arm_arch5
|| arm_arch4t
)
19605 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19607 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19611 output_asm_insn (instr
, & operand
);
19617 /* Write the function name into the code section, directly preceding
19618 the function prologue.
19620 Code will be output similar to this:
19622 .ascii "arm_poke_function_name", 0
19625 .word 0xff000000 + (t1 - t0)
19626 arm_poke_function_name
19628 stmfd sp!, {fp, ip, lr, pc}
19631 When performing a stack backtrace, code can inspect the value
19632 of 'pc' stored at 'fp' + 0. If the trace function then looks
19633 at location pc - 12 and the top 8 bits are set, then we know
19634 that there is a function name embedded immediately preceding this
19635 location and has length ((pc[-3]) & 0xff000000).
19637 We assume that pc is declared as a pointer to an unsigned long.
19639 It is of no benefit to output the function name if we are assembling
19640 a leaf function. These function types will not contain a stack
19641 backtrace structure, therefore it is not possible to determine the
19644 arm_poke_function_name (FILE *stream
, const char *name
)
19646 unsigned long alignlength
;
19647 unsigned long length
;
19650 length
= strlen (name
) + 1;
19651 alignlength
= ROUND_UP_WORD (length
);
19653 ASM_OUTPUT_ASCII (stream
, name
, length
);
19654 ASM_OUTPUT_ALIGN (stream
, 2);
19655 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19656 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19659 /* Place some comments into the assembler stream
19660 describing the current function. */
19662 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19664 unsigned long func_type
;
19666 /* ??? Do we want to print some of the below anyway? */
19670 /* Sanity check. */
19671 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19673 func_type
= arm_current_func_type ();
19675 switch ((int) ARM_FUNC_TYPE (func_type
))
19678 case ARM_FT_NORMAL
:
19680 case ARM_FT_INTERWORKED
:
19681 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19684 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19687 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19689 case ARM_FT_EXCEPTION
:
19690 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19694 if (IS_NAKED (func_type
))
19695 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19697 if (IS_VOLATILE (func_type
))
19698 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19700 if (IS_NESTED (func_type
))
19701 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19702 if (IS_STACKALIGN (func_type
))
19703 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19705 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19707 crtl
->args
.pretend_args_size
, frame_size
);
19709 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19710 frame_pointer_needed
,
19711 cfun
->machine
->uses_anonymous_args
);
19713 if (cfun
->machine
->lr_save_eliminated
)
19714 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19716 if (crtl
->calls_eh_return
)
19717 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19722 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19723 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19725 arm_stack_offsets
*offsets
;
19731 /* Emit any call-via-reg trampolines that are needed for v4t support
19732 of call_reg and call_value_reg type insns. */
19733 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19735 rtx label
= cfun
->machine
->call_via
[regno
];
19739 switch_to_section (function_section (current_function_decl
));
19740 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19741 CODE_LABEL_NUMBER (label
));
19742 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19746 /* ??? Probably not safe to set this here, since it assumes that a
19747 function will be emitted as assembly immediately after we generate
19748 RTL for it. This does not happen for inline functions. */
19749 cfun
->machine
->return_used_this_function
= 0;
19751 else /* TARGET_32BIT */
19753 /* We need to take into account any stack-frame rounding. */
19754 offsets
= arm_get_frame_offsets ();
19756 gcc_assert (!use_return_insn (FALSE
, NULL
)
19757 || (cfun
->machine
->return_used_this_function
!= 0)
19758 || offsets
->saved_regs
== offsets
->outgoing_args
19759 || frame_pointer_needed
);
19763 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19764 STR and STRD. If an even number of registers are being pushed, one
19765 or more STRD patterns are created for each register pair. If an
19766 odd number of registers are pushed, emit an initial STR followed by
19767 as many STRD instructions as are needed. This works best when the
19768 stack is initially 64-bit aligned (the normal case), since it
19769 ensures that each STRD is also 64-bit aligned. */
19771 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19776 rtx par
= NULL_RTX
;
19777 rtx dwarf
= NULL_RTX
;
19781 num_regs
= bit_count (saved_regs_mask
);
19783 /* Must be at least one register to save, and can't save SP or PC. */
19784 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19785 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19786 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19788 /* Create sequence for DWARF info. All the frame-related data for
19789 debugging is held in this wrapper. */
19790 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19792 /* Describe the stack adjustment. */
19793 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19794 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19795 RTX_FRAME_RELATED_P (tmp
) = 1;
19796 XVECEXP (dwarf
, 0, 0) = tmp
;
19798 /* Find the first register. */
19799 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19804 /* If there's an odd number of registers to push. Start off by
19805 pushing a single register. This ensures that subsequent strd
19806 operations are dword aligned (assuming that SP was originally
19807 64-bit aligned). */
19808 if ((num_regs
& 1) != 0)
19810 rtx reg
, mem
, insn
;
19812 reg
= gen_rtx_REG (SImode
, regno
);
19814 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19815 stack_pointer_rtx
));
19817 mem
= gen_frame_mem (Pmode
,
19819 (Pmode
, stack_pointer_rtx
,
19820 plus_constant (Pmode
, stack_pointer_rtx
,
19823 tmp
= gen_rtx_SET (mem
, reg
);
19824 RTX_FRAME_RELATED_P (tmp
) = 1;
19825 insn
= emit_insn (tmp
);
19826 RTX_FRAME_RELATED_P (insn
) = 1;
19827 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19828 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19829 RTX_FRAME_RELATED_P (tmp
) = 1;
19832 XVECEXP (dwarf
, 0, i
) = tmp
;
19836 while (i
< num_regs
)
19837 if (saved_regs_mask
& (1 << regno
))
19839 rtx reg1
, reg2
, mem1
, mem2
;
19840 rtx tmp0
, tmp1
, tmp2
;
19843 /* Find the register to pair with this one. */
19844 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19848 reg1
= gen_rtx_REG (SImode
, regno
);
19849 reg2
= gen_rtx_REG (SImode
, regno2
);
19856 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19859 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19861 -4 * (num_regs
- 1)));
19862 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19863 plus_constant (Pmode
, stack_pointer_rtx
,
19865 tmp1
= gen_rtx_SET (mem1
, reg1
);
19866 tmp2
= gen_rtx_SET (mem2
, reg2
);
19867 RTX_FRAME_RELATED_P (tmp0
) = 1;
19868 RTX_FRAME_RELATED_P (tmp1
) = 1;
19869 RTX_FRAME_RELATED_P (tmp2
) = 1;
19870 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19871 XVECEXP (par
, 0, 0) = tmp0
;
19872 XVECEXP (par
, 0, 1) = tmp1
;
19873 XVECEXP (par
, 0, 2) = tmp2
;
19874 insn
= emit_insn (par
);
19875 RTX_FRAME_RELATED_P (insn
) = 1;
19876 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19880 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19883 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19886 tmp1
= gen_rtx_SET (mem1
, reg1
);
19887 tmp2
= gen_rtx_SET (mem2
, reg2
);
19888 RTX_FRAME_RELATED_P (tmp1
) = 1;
19889 RTX_FRAME_RELATED_P (tmp2
) = 1;
19890 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19891 XVECEXP (par
, 0, 0) = tmp1
;
19892 XVECEXP (par
, 0, 1) = tmp2
;
19896 /* Create unwind information. This is an approximation. */
19897 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19898 plus_constant (Pmode
,
19902 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19903 plus_constant (Pmode
,
19908 RTX_FRAME_RELATED_P (tmp1
) = 1;
19909 RTX_FRAME_RELATED_P (tmp2
) = 1;
19910 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19911 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19913 regno
= regno2
+ 1;
19921 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19922 whenever possible, otherwise it emits single-word stores. The first store
19923 also allocates stack space for all saved registers, using writeback with
19924 post-addressing mode. All other stores use offset addressing. If no STRD
19925 can be emitted, this function emits a sequence of single-word stores,
19926 and not an STM as before, because single-word stores provide more freedom
19927 scheduling and can be turned into an STM by peephole optimizations. */
19929 arm_emit_strd_push (unsigned long saved_regs_mask
)
19932 int i
, j
, dwarf_index
= 0;
19934 rtx dwarf
= NULL_RTX
;
19935 rtx insn
= NULL_RTX
;
19938 /* TODO: A more efficient code can be emitted by changing the
19939 layout, e.g., first push all pairs that can use STRD to keep the
19940 stack aligned, and then push all other registers. */
19941 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19942 if (saved_regs_mask
& (1 << i
))
19945 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19946 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19947 gcc_assert (num_regs
> 0);
19949 /* Create sequence for DWARF info. */
19950 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19952 /* For dwarf info, we generate explicit stack update. */
19953 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19954 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19955 RTX_FRAME_RELATED_P (tmp
) = 1;
19956 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19958 /* Save registers. */
19959 offset
= - 4 * num_regs
;
19961 while (j
<= LAST_ARM_REGNUM
)
19962 if (saved_regs_mask
& (1 << j
))
19965 && (saved_regs_mask
& (1 << (j
+ 1))))
19967 /* Current register and previous register form register pair for
19968 which STRD can be generated. */
19971 /* Allocate stack space for all saved registers. */
19972 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19973 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19974 mem
= gen_frame_mem (DImode
, tmp
);
19977 else if (offset
> 0)
19978 mem
= gen_frame_mem (DImode
,
19979 plus_constant (Pmode
,
19983 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19985 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19986 RTX_FRAME_RELATED_P (tmp
) = 1;
19987 tmp
= emit_insn (tmp
);
19989 /* Record the first store insn. */
19990 if (dwarf_index
== 1)
19993 /* Generate dwarf info. */
19994 mem
= gen_frame_mem (SImode
,
19995 plus_constant (Pmode
,
19998 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19999 RTX_FRAME_RELATED_P (tmp
) = 1;
20000 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20002 mem
= gen_frame_mem (SImode
,
20003 plus_constant (Pmode
,
20006 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20007 RTX_FRAME_RELATED_P (tmp
) = 1;
20008 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20015 /* Emit a single word store. */
20018 /* Allocate stack space for all saved registers. */
20019 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20020 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20021 mem
= gen_frame_mem (SImode
, tmp
);
20024 else if (offset
> 0)
20025 mem
= gen_frame_mem (SImode
,
20026 plus_constant (Pmode
,
20030 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20032 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20033 RTX_FRAME_RELATED_P (tmp
) = 1;
20034 tmp
= emit_insn (tmp
);
20036 /* Record the first store insn. */
20037 if (dwarf_index
== 1)
20040 /* Generate dwarf info. */
20041 mem
= gen_frame_mem (SImode
,
20042 plus_constant(Pmode
,
20045 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20046 RTX_FRAME_RELATED_P (tmp
) = 1;
20047 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20056 /* Attach dwarf info to the first insn we generate. */
20057 gcc_assert (insn
!= NULL_RTX
);
20058 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20059 RTX_FRAME_RELATED_P (insn
) = 1;
20062 /* Generate and emit an insn that we will recognize as a push_multi.
20063 Unfortunately, since this insn does not reflect very well the actual
20064 semantics of the operation, we need to annotate the insn for the benefit
20065 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20066 MASK for registers that should be annotated for DWARF2 frame unwind
20069 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20072 int num_dwarf_regs
= 0;
20076 int dwarf_par_index
;
20079 /* We don't record the PC in the dwarf frame information. */
20080 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20082 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20084 if (mask
& (1 << i
))
20086 if (dwarf_regs_mask
& (1 << i
))
20090 gcc_assert (num_regs
&& num_regs
<= 16);
20091 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20093 /* For the body of the insn we are going to generate an UNSPEC in
20094 parallel with several USEs. This allows the insn to be recognized
20095 by the push_multi pattern in the arm.md file.
20097 The body of the insn looks something like this:
20100 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20101 (const_int:SI <num>)))
20102 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20108 For the frame note however, we try to be more explicit and actually
20109 show each register being stored into the stack frame, plus a (single)
20110 decrement of the stack pointer. We do it this way in order to be
20111 friendly to the stack unwinding code, which only wants to see a single
20112 stack decrement per instruction. The RTL we generate for the note looks
20113 something like this:
20116 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20117 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20118 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20119 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20123 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20124 instead we'd have a parallel expression detailing all
20125 the stores to the various memory addresses so that debug
20126 information is more up-to-date. Remember however while writing
20127 this to take care of the constraints with the push instruction.
20129 Note also that this has to be taken care of for the VFP registers.
20131 For more see PR43399. */
20133 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20134 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20135 dwarf_par_index
= 1;
20137 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20139 if (mask
& (1 << i
))
20141 reg
= gen_rtx_REG (SImode
, i
);
20143 XVECEXP (par
, 0, 0)
20144 = gen_rtx_SET (gen_frame_mem
20146 gen_rtx_PRE_MODIFY (Pmode
,
20149 (Pmode
, stack_pointer_rtx
,
20152 gen_rtx_UNSPEC (BLKmode
,
20153 gen_rtvec (1, reg
),
20154 UNSPEC_PUSH_MULT
));
20156 if (dwarf_regs_mask
& (1 << i
))
20158 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20160 RTX_FRAME_RELATED_P (tmp
) = 1;
20161 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20168 for (j
= 1, i
++; j
< num_regs
; i
++)
20170 if (mask
& (1 << i
))
20172 reg
= gen_rtx_REG (SImode
, i
);
20174 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20176 if (dwarf_regs_mask
& (1 << i
))
20179 = gen_rtx_SET (gen_frame_mem
20181 plus_constant (Pmode
, stack_pointer_rtx
,
20184 RTX_FRAME_RELATED_P (tmp
) = 1;
20185 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20192 par
= emit_insn (par
);
20194 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20195 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20196 RTX_FRAME_RELATED_P (tmp
) = 1;
20197 XVECEXP (dwarf
, 0, 0) = tmp
;
20199 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20204 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20205 SIZE is the offset to be adjusted.
20206 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20208 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20212 RTX_FRAME_RELATED_P (insn
) = 1;
20213 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20214 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20217 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20218 SAVED_REGS_MASK shows which registers need to be restored.
20220 Unfortunately, since this insn does not reflect very well the actual
20221 semantics of the operation, we need to annotate the insn for the benefit
20222 of DWARF2 frame unwind information. */
20224 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20229 rtx dwarf
= NULL_RTX
;
20231 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20235 offset_adj
= return_in_pc
? 1 : 0;
20236 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20237 if (saved_regs_mask
& (1 << i
))
20240 gcc_assert (num_regs
&& num_regs
<= 16);
20242 /* If SP is in reglist, then we don't emit SP update insn. */
20243 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20245 /* The parallel needs to hold num_regs SETs
20246 and one SET for the stack update. */
20247 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20250 XVECEXP (par
, 0, 0) = ret_rtx
;
20254 /* Increment the stack pointer, based on there being
20255 num_regs 4-byte registers to restore. */
20256 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20257 plus_constant (Pmode
,
20260 RTX_FRAME_RELATED_P (tmp
) = 1;
20261 XVECEXP (par
, 0, offset_adj
) = tmp
;
20264 /* Now restore every reg, which may include PC. */
20265 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20266 if (saved_regs_mask
& (1 << i
))
20268 reg
= gen_rtx_REG (SImode
, i
);
20269 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20271 /* Emit single load with writeback. */
20272 tmp
= gen_frame_mem (SImode
,
20273 gen_rtx_POST_INC (Pmode
,
20274 stack_pointer_rtx
));
20275 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20276 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20280 tmp
= gen_rtx_SET (reg
,
20283 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20284 RTX_FRAME_RELATED_P (tmp
) = 1;
20285 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20287 /* We need to maintain a sequence for DWARF info too. As dwarf info
20288 should not have PC, skip PC. */
20289 if (i
!= PC_REGNUM
)
20290 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20296 par
= emit_jump_insn (par
);
20298 par
= emit_insn (par
);
20300 REG_NOTES (par
) = dwarf
;
20302 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20303 stack_pointer_rtx
, stack_pointer_rtx
);
20306 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20307 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20309 Unfortunately, since this insn does not reflect very well the actual
20310 semantics of the operation, we need to annotate the insn for the benefit
20311 of DWARF2 frame unwind information. */
20313 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20317 rtx dwarf
= NULL_RTX
;
20320 gcc_assert (num_regs
&& num_regs
<= 32);
20322 /* Workaround ARM10 VFPr1 bug. */
20323 if (num_regs
== 2 && !arm_arch6
)
20325 if (first_reg
== 15)
20331 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20332 there could be up to 32 D-registers to restore.
20333 If there are more than 16 D-registers, make two recursive calls,
20334 each of which emits one pop_multi instruction. */
20337 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20338 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20342 /* The parallel needs to hold num_regs SETs
20343 and one SET for the stack update. */
20344 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20346 /* Increment the stack pointer, based on there being
20347 num_regs 8-byte registers to restore. */
20348 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20349 RTX_FRAME_RELATED_P (tmp
) = 1;
20350 XVECEXP (par
, 0, 0) = tmp
;
20352 /* Now show every reg that will be restored, using a SET for each. */
20353 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20355 reg
= gen_rtx_REG (DFmode
, i
);
20357 tmp
= gen_rtx_SET (reg
,
20360 plus_constant (Pmode
, base_reg
, 8 * j
)));
20361 RTX_FRAME_RELATED_P (tmp
) = 1;
20362 XVECEXP (par
, 0, j
+ 1) = tmp
;
20364 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20369 par
= emit_insn (par
);
20370 REG_NOTES (par
) = dwarf
;
20372 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20373 if (TARGET_VFP
&& REGNO (base_reg
) == IP_REGNUM
)
20375 RTX_FRAME_RELATED_P (par
) = 1;
20376 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20379 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20380 base_reg
, base_reg
);
20383 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20384 number of registers are being popped, multiple LDRD patterns are created for
20385 all register pairs. If odd number of registers are popped, last register is
20386 loaded by using LDR pattern. */
20388 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20392 rtx par
= NULL_RTX
;
20393 rtx dwarf
= NULL_RTX
;
20394 rtx tmp
, reg
, tmp1
;
20395 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20397 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20398 if (saved_regs_mask
& (1 << i
))
20401 gcc_assert (num_regs
&& num_regs
<= 16);
20403 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20404 to be popped. So, if num_regs is even, now it will become odd,
20405 and we can generate pop with PC. If num_regs is odd, it will be
20406 even now, and ldr with return can be generated for PC. */
20410 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20412 /* Var j iterates over all the registers to gather all the registers in
20413 saved_regs_mask. Var i gives index of saved registers in stack frame.
20414 A PARALLEL RTX of register-pair is created here, so that pattern for
20415 LDRD can be matched. As PC is always last register to be popped, and
20416 we have already decremented num_regs if PC, we don't have to worry
20417 about PC in this loop. */
20418 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20419 if (saved_regs_mask
& (1 << j
))
20421 /* Create RTX for memory load. */
20422 reg
= gen_rtx_REG (SImode
, j
);
20423 tmp
= gen_rtx_SET (reg
,
20424 gen_frame_mem (SImode
,
20425 plus_constant (Pmode
,
20426 stack_pointer_rtx
, 4 * i
)));
20427 RTX_FRAME_RELATED_P (tmp
) = 1;
20431 /* When saved-register index (i) is even, the RTX to be emitted is
20432 yet to be created. Hence create it first. The LDRD pattern we
20433 are generating is :
20434 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20435 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20436 where target registers need not be consecutive. */
20437 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20441 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20442 added as 0th element and if i is odd, reg_i is added as 1st element
20443 of LDRD pattern shown above. */
20444 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20445 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20449 /* When saved-register index (i) is odd, RTXs for both the registers
20450 to be loaded are generated in above given LDRD pattern, and the
20451 pattern can be emitted now. */
20452 par
= emit_insn (par
);
20453 REG_NOTES (par
) = dwarf
;
20454 RTX_FRAME_RELATED_P (par
) = 1;
20460 /* If the number of registers pushed is odd AND return_in_pc is false OR
20461 number of registers are even AND return_in_pc is true, last register is
20462 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20463 then LDR with post increment. */
20465 /* Increment the stack pointer, based on there being
20466 num_regs 4-byte registers to restore. */
20467 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20468 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20469 RTX_FRAME_RELATED_P (tmp
) = 1;
20470 tmp
= emit_insn (tmp
);
20473 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20474 stack_pointer_rtx
, stack_pointer_rtx
);
20479 if (((num_regs
% 2) == 1 && !return_in_pc
)
20480 || ((num_regs
% 2) == 0 && return_in_pc
))
20482 /* Scan for the single register to be popped. Skip until the saved
20483 register is found. */
20484 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20486 /* Gen LDR with post increment here. */
20487 tmp1
= gen_rtx_MEM (SImode
,
20488 gen_rtx_POST_INC (SImode
,
20489 stack_pointer_rtx
));
20490 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20492 reg
= gen_rtx_REG (SImode
, j
);
20493 tmp
= gen_rtx_SET (reg
, tmp1
);
20494 RTX_FRAME_RELATED_P (tmp
) = 1;
20495 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20499 /* If return_in_pc, j must be PC_REGNUM. */
20500 gcc_assert (j
== PC_REGNUM
);
20501 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20502 XVECEXP (par
, 0, 0) = ret_rtx
;
20503 XVECEXP (par
, 0, 1) = tmp
;
20504 par
= emit_jump_insn (par
);
20508 par
= emit_insn (tmp
);
20509 REG_NOTES (par
) = dwarf
;
20510 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20511 stack_pointer_rtx
, stack_pointer_rtx
);
20515 else if ((num_regs
% 2) == 1 && return_in_pc
)
20517 /* There are 2 registers to be popped. So, generate the pattern
20518 pop_multiple_with_stack_update_and_return to pop in PC. */
20519 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20525 /* LDRD in ARM mode needs consecutive registers as operands. This function
20526 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20527 offset addressing and then generates one separate stack udpate. This provides
20528 more scheduling freedom, compared to writeback on every load. However,
20529 if the function returns using load into PC directly
20530 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20531 before the last load. TODO: Add a peephole optimization to recognize
20532 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20533 peephole optimization to merge the load at stack-offset zero
20534 with the stack update instruction using load with writeback
20535 in post-index addressing mode. */
20537 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20541 rtx par
= NULL_RTX
;
20542 rtx dwarf
= NULL_RTX
;
20545 /* Restore saved registers. */
20546 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20548 while (j
<= LAST_ARM_REGNUM
)
20549 if (saved_regs_mask
& (1 << j
))
20552 && (saved_regs_mask
& (1 << (j
+ 1)))
20553 && (j
+ 1) != PC_REGNUM
)
20555 /* Current register and next register form register pair for which
20556 LDRD can be generated. PC is always the last register popped, and
20557 we handle it separately. */
20559 mem
= gen_frame_mem (DImode
,
20560 plus_constant (Pmode
,
20564 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20566 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20567 tmp
= emit_insn (tmp
);
20568 RTX_FRAME_RELATED_P (tmp
) = 1;
20570 /* Generate dwarf info. */
20572 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20573 gen_rtx_REG (SImode
, j
),
20575 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20576 gen_rtx_REG (SImode
, j
+ 1),
20579 REG_NOTES (tmp
) = dwarf
;
20584 else if (j
!= PC_REGNUM
)
20586 /* Emit a single word load. */
20588 mem
= gen_frame_mem (SImode
,
20589 plus_constant (Pmode
,
20593 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20595 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20596 tmp
= emit_insn (tmp
);
20597 RTX_FRAME_RELATED_P (tmp
) = 1;
20599 /* Generate dwarf info. */
20600 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20601 gen_rtx_REG (SImode
, j
),
20607 else /* j == PC_REGNUM */
20613 /* Update the stack. */
20616 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20617 plus_constant (Pmode
,
20620 tmp
= emit_insn (tmp
);
20621 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20622 stack_pointer_rtx
, stack_pointer_rtx
);
20626 if (saved_regs_mask
& (1 << PC_REGNUM
))
20628 /* Only PC is to be popped. */
20629 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20630 XVECEXP (par
, 0, 0) = ret_rtx
;
20631 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20632 gen_frame_mem (SImode
,
20633 gen_rtx_POST_INC (SImode
,
20634 stack_pointer_rtx
)));
20635 RTX_FRAME_RELATED_P (tmp
) = 1;
20636 XVECEXP (par
, 0, 1) = tmp
;
20637 par
= emit_jump_insn (par
);
20639 /* Generate dwarf info. */
20640 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20641 gen_rtx_REG (SImode
, PC_REGNUM
),
20643 REG_NOTES (par
) = dwarf
;
20644 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20645 stack_pointer_rtx
, stack_pointer_rtx
);
20649 /* Calculate the size of the return value that is passed in registers. */
20651 arm_size_return_regs (void)
20655 if (crtl
->return_rtx
!= 0)
20656 mode
= GET_MODE (crtl
->return_rtx
);
20658 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20660 return GET_MODE_SIZE (mode
);
20663 /* Return true if the current function needs to save/restore LR. */
20665 thumb_force_lr_save (void)
20667 return !cfun
->machine
->lr_save_eliminated
20668 && (!leaf_function_p ()
20669 || thumb_far_jump_used_p ()
20670 || df_regs_ever_live_p (LR_REGNUM
));
20673 /* We do not know if r3 will be available because
20674 we do have an indirect tailcall happening in this
20675 particular case. */
20677 is_indirect_tailcall_p (rtx call
)
20679 rtx pat
= PATTERN (call
);
20681 /* Indirect tail call. */
20682 pat
= XVECEXP (pat
, 0, 0);
20683 if (GET_CODE (pat
) == SET
)
20684 pat
= SET_SRC (pat
);
20686 pat
= XEXP (XEXP (pat
, 0), 0);
20687 return REG_P (pat
);
20690 /* Return true if r3 is used by any of the tail call insns in the
20691 current function. */
20693 any_sibcall_could_use_r3 (void)
20698 if (!crtl
->tail_call_emit
)
20700 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20701 if (e
->flags
& EDGE_SIBCALL
)
20703 rtx call
= BB_END (e
->src
);
20704 if (!CALL_P (call
))
20705 call
= prev_nonnote_nondebug_insn (call
);
20706 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20707 if (find_regno_fusage (call
, USE
, 3)
20708 || is_indirect_tailcall_p (call
))
20715 /* Compute the distance from register FROM to register TO.
20716 These can be the arg pointer (26), the soft frame pointer (25),
20717 the stack pointer (13) or the hard frame pointer (11).
20718 In thumb mode r7 is used as the soft frame pointer, if needed.
20719 Typical stack layout looks like this:
20721 old stack pointer -> | |
20724 | | saved arguments for
20725 | | vararg functions
20728 hard FP & arg pointer -> | | \
20736 soft frame pointer -> | | /
20741 locals base pointer -> | | /
20746 current stack pointer -> | | /
20749 For a given function some or all of these stack components
20750 may not be needed, giving rise to the possibility of
20751 eliminating some of the registers.
20753 The values returned by this function must reflect the behavior
20754 of arm_expand_prologue() and arm_compute_save_reg_mask().
20756 The sign of the number returned reflects the direction of stack
20757 growth, so the values are positive for all eliminations except
20758 from the soft frame pointer to the hard frame pointer.
20760 SFP may point just inside the local variables block to ensure correct
20764 /* Calculate stack offsets. These are used to calculate register elimination
20765 offsets and in prologue/epilogue code. Also calculates which registers
20766 should be saved. */
20768 static arm_stack_offsets
*
20769 arm_get_frame_offsets (void)
20771 struct arm_stack_offsets
*offsets
;
20772 unsigned long func_type
;
20776 HOST_WIDE_INT frame_size
;
20779 offsets
= &cfun
->machine
->stack_offsets
;
20781 /* We need to know if we are a leaf function. Unfortunately, it
20782 is possible to be called after start_sequence has been called,
20783 which causes get_insns to return the insns for the sequence,
20784 not the function, which will cause leaf_function_p to return
20785 the incorrect result.
20787 to know about leaf functions once reload has completed, and the
20788 frame size cannot be changed after that time, so we can safely
20789 use the cached value. */
20791 if (reload_completed
)
20794 /* Initially this is the size of the local variables. It will translated
20795 into an offset once we have determined the size of preceding data. */
20796 frame_size
= ROUND_UP_WORD (get_frame_size ());
20798 leaf
= leaf_function_p ();
20800 /* Space for variadic functions. */
20801 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20803 /* In Thumb mode this is incorrect, but never used. */
20805 = (offsets
->saved_args
20806 + arm_compute_static_chain_stack_bytes ()
20807 + (frame_pointer_needed
? 4 : 0));
20811 unsigned int regno
;
20813 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20814 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20815 saved
= core_saved
;
20817 /* We know that SP will be doubleword aligned on entry, and we must
20818 preserve that condition at any subroutine call. We also require the
20819 soft frame pointer to be doubleword aligned. */
20821 if (TARGET_REALLY_IWMMXT
)
20823 /* Check for the call-saved iWMMXt registers. */
20824 for (regno
= FIRST_IWMMXT_REGNUM
;
20825 regno
<= LAST_IWMMXT_REGNUM
;
20827 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20831 func_type
= arm_current_func_type ();
20832 /* Space for saved VFP registers. */
20833 if (! IS_VOLATILE (func_type
)
20834 && TARGET_HARD_FLOAT
&& TARGET_VFP
)
20835 saved
+= arm_get_vfp_saved_size ();
20837 else /* TARGET_THUMB1 */
20839 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20840 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20841 saved
= core_saved
;
20842 if (TARGET_BACKTRACE
)
20846 /* Saved registers include the stack frame. */
20847 offsets
->saved_regs
20848 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20849 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20851 /* A leaf function does not need any stack alignment if it has nothing
20853 if (leaf
&& frame_size
== 0
20854 /* However if it calls alloca(), we have a dynamically allocated
20855 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20856 && ! cfun
->calls_alloca
)
20858 offsets
->outgoing_args
= offsets
->soft_frame
;
20859 offsets
->locals_base
= offsets
->soft_frame
;
20863 /* Ensure SFP has the correct alignment. */
20864 if (ARM_DOUBLEWORD_ALIGN
20865 && (offsets
->soft_frame
& 7))
20867 offsets
->soft_frame
+= 4;
20868 /* Try to align stack by pushing an extra reg. Don't bother doing this
20869 when there is a stack frame as the alignment will be rolled into
20870 the normal stack adjustment. */
20871 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20875 /* Register r3 is caller-saved. Normally it does not need to be
20876 saved on entry by the prologue. However if we choose to save
20877 it for padding then we may confuse the compiler into thinking
20878 a prologue sequence is required when in fact it is not. This
20879 will occur when shrink-wrapping if r3 is used as a scratch
20880 register and there are no other callee-saved writes.
20882 This situation can be avoided when other callee-saved registers
20883 are available and r3 is not mandatory if we choose a callee-saved
20884 register for padding. */
20885 bool prefer_callee_reg_p
= false;
20887 /* If it is safe to use r3, then do so. This sometimes
20888 generates better code on Thumb-2 by avoiding the need to
20889 use 32-bit push/pop instructions. */
20890 if (! any_sibcall_could_use_r3 ()
20891 && arm_size_return_regs () <= 12
20892 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20894 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20897 if (!TARGET_THUMB2
)
20898 prefer_callee_reg_p
= true;
20901 || prefer_callee_reg_p
)
20903 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20905 /* Avoid fixed registers; they may be changed at
20906 arbitrary times so it's unsafe to restore them
20907 during the epilogue. */
20909 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20919 offsets
->saved_regs
+= 4;
20920 offsets
->saved_regs_mask
|= (1 << reg
);
20925 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20926 offsets
->outgoing_args
= (offsets
->locals_base
20927 + crtl
->outgoing_args_size
);
20929 if (ARM_DOUBLEWORD_ALIGN
)
20931 /* Ensure SP remains doubleword aligned. */
20932 if (offsets
->outgoing_args
& 7)
20933 offsets
->outgoing_args
+= 4;
20934 gcc_assert (!(offsets
->outgoing_args
& 7));
20941 /* Calculate the relative offsets for the different stack pointers. Positive
20942 offsets are in the direction of stack growth. */
20945 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20947 arm_stack_offsets
*offsets
;
20949 offsets
= arm_get_frame_offsets ();
20951 /* OK, now we have enough information to compute the distances.
20952 There must be an entry in these switch tables for each pair
20953 of registers in ELIMINABLE_REGS, even if some of the entries
20954 seem to be redundant or useless. */
20957 case ARG_POINTER_REGNUM
:
20960 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20963 case FRAME_POINTER_REGNUM
:
20964 /* This is the reverse of the soft frame pointer
20965 to hard frame pointer elimination below. */
20966 return offsets
->soft_frame
- offsets
->saved_args
;
20968 case ARM_HARD_FRAME_POINTER_REGNUM
:
20969 /* This is only non-zero in the case where the static chain register
20970 is stored above the frame. */
20971 return offsets
->frame
- offsets
->saved_args
- 4;
20973 case STACK_POINTER_REGNUM
:
20974 /* If nothing has been pushed on the stack at all
20975 then this will return -4. This *is* correct! */
20976 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20979 gcc_unreachable ();
20981 gcc_unreachable ();
20983 case FRAME_POINTER_REGNUM
:
20986 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20989 case ARM_HARD_FRAME_POINTER_REGNUM
:
20990 /* The hard frame pointer points to the top entry in the
20991 stack frame. The soft frame pointer to the bottom entry
20992 in the stack frame. If there is no stack frame at all,
20993 then they are identical. */
20995 return offsets
->frame
- offsets
->soft_frame
;
20997 case STACK_POINTER_REGNUM
:
20998 return offsets
->outgoing_args
- offsets
->soft_frame
;
21001 gcc_unreachable ();
21003 gcc_unreachable ();
21006 /* You cannot eliminate from the stack pointer.
21007 In theory you could eliminate from the hard frame
21008 pointer to the stack pointer, but this will never
21009 happen, since if a stack frame is not needed the
21010 hard frame pointer will never be used. */
21011 gcc_unreachable ();
21015 /* Given FROM and TO register numbers, say whether this elimination is
21016 allowed. Frame pointer elimination is automatically handled.
21018 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21019 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21020 pointer, we must eliminate FRAME_POINTER_REGNUM into
21021 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21022 ARG_POINTER_REGNUM. */
21025 arm_can_eliminate (const int from
, const int to
)
21027 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21028 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21029 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21030 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21034 /* Emit RTL to save coprocessor registers on function entry. Returns the
21035 number of bytes pushed. */
21038 arm_save_coproc_regs(void)
21040 int saved_size
= 0;
21042 unsigned start_reg
;
21045 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21046 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21048 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21049 insn
= gen_rtx_MEM (V2SImode
, insn
);
21050 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21051 RTX_FRAME_RELATED_P (insn
) = 1;
21055 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
21057 start_reg
= FIRST_VFP_REGNUM
;
21059 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21061 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21062 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21064 if (start_reg
!= reg
)
21065 saved_size
+= vfp_emit_fstmd (start_reg
,
21066 (reg
- start_reg
) / 2);
21067 start_reg
= reg
+ 2;
21070 if (start_reg
!= reg
)
21071 saved_size
+= vfp_emit_fstmd (start_reg
,
21072 (reg
- start_reg
) / 2);
21078 /* Set the Thumb frame pointer from the stack pointer. */
21081 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21083 HOST_WIDE_INT amount
;
21086 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21088 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21089 stack_pointer_rtx
, GEN_INT (amount
)));
21092 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21093 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21094 expects the first two operands to be the same. */
21097 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21099 hard_frame_pointer_rtx
));
21103 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21104 hard_frame_pointer_rtx
,
21105 stack_pointer_rtx
));
21107 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21108 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21109 RTX_FRAME_RELATED_P (dwarf
) = 1;
21110 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21113 RTX_FRAME_RELATED_P (insn
) = 1;
21116 struct scratch_reg
{
21121 /* Return a short-lived scratch register for use as a 2nd scratch register on
21122 function entry after the registers are saved in the prologue. This register
21123 must be released by means of release_scratch_register_on_entry. IP is not
21124 considered since it is always used as the 1st scratch register if available.
21126 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21127 mask of live registers. */
21130 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21131 unsigned long live_regs
)
21137 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21143 for (i
= 4; i
< 11; i
++)
21144 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21152 /* If IP is used as the 1st scratch register for a nested function,
21153 then either r3 wasn't available or is used to preserve IP. */
21154 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21156 regno
= (regno1
== 3 ? 2 : 3);
21158 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21163 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21166 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21167 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21168 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21169 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21170 RTX_FRAME_RELATED_P (insn
) = 1;
21171 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21175 /* Release a scratch register obtained from the preceding function. */
21178 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21182 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21183 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21184 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21185 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21186 RTX_FRAME_RELATED_P (insn
) = 1;
21187 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21191 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21193 #if PROBE_INTERVAL > 4096
21194 #error Cannot use indexed addressing mode for stack probing
21197 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21198 inclusive. These are offsets from the current stack pointer. REGNO1
21199 is the index number of the 1st scratch register and LIVE_REGS is the
21200 mask of live registers. */
21203 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21204 unsigned int regno1
, unsigned long live_regs
)
21206 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21208 /* See if we have a constant small number of probes to generate. If so,
21209 that's the easy case. */
21210 if (size
<= PROBE_INTERVAL
)
21212 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21213 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21214 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21217 /* The run-time loop is made up of 10 insns in the generic case while the
21218 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21219 else if (size
<= 5 * PROBE_INTERVAL
)
21221 HOST_WIDE_INT i
, rem
;
21223 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21224 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21225 emit_stack_probe (reg1
);
21227 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21228 it exceeds SIZE. If only two probes are needed, this will not
21229 generate any code. Then probe at FIRST + SIZE. */
21230 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21232 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21233 emit_stack_probe (reg1
);
21236 rem
= size
- (i
- PROBE_INTERVAL
);
21237 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21239 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21240 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21243 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21246 /* Otherwise, do the same as above, but in a loop. Note that we must be
21247 extra careful with variables wrapping around because we might be at
21248 the very top (or the very bottom) of the address space and we have
21249 to be able to handle this case properly; in particular, we use an
21250 equality test for the loop condition. */
21253 HOST_WIDE_INT rounded_size
;
21254 struct scratch_reg sr
;
21256 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21258 emit_move_insn (reg1
, GEN_INT (first
));
21261 /* Step 1: round SIZE to the previous multiple of the interval. */
21263 rounded_size
= size
& -PROBE_INTERVAL
;
21264 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21267 /* Step 2: compute initial and final value of the loop counter. */
21269 /* TEST_ADDR = SP + FIRST. */
21270 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21272 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21273 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21276 /* Step 3: the loop
21280 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21283 while (TEST_ADDR != LAST_ADDR)
21285 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21286 until it is equal to ROUNDED_SIZE. */
21288 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21291 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21292 that SIZE is equal to ROUNDED_SIZE. */
21294 if (size
!= rounded_size
)
21296 HOST_WIDE_INT rem
= size
- rounded_size
;
21298 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21300 emit_set_insn (sr
.reg
,
21301 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21302 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21303 PROBE_INTERVAL
- rem
));
21306 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21309 release_scratch_register_on_entry (&sr
);
21312 /* Make sure nothing is scheduled before we are done. */
21313 emit_insn (gen_blockage ());
21316 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21317 absolute addresses. */
21320 output_probe_stack_range (rtx reg1
, rtx reg2
)
21322 static int labelno
= 0;
21326 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21329 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21331 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21333 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21334 output_asm_insn ("sub\t%0, %0, %1", xops
);
21336 /* Probe at TEST_ADDR. */
21337 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21339 /* Test if TEST_ADDR == LAST_ADDR. */
21341 output_asm_insn ("cmp\t%0, %1", xops
);
21344 fputs ("\tbne\t", asm_out_file
);
21345 assemble_name_raw (asm_out_file
, loop_lab
);
21346 fputc ('\n', asm_out_file
);
21351 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21354 arm_expand_prologue (void)
21359 unsigned long live_regs_mask
;
21360 unsigned long func_type
;
21362 int saved_pretend_args
= 0;
21363 int saved_regs
= 0;
21364 unsigned HOST_WIDE_INT args_to_push
;
21365 HOST_WIDE_INT size
;
21366 arm_stack_offsets
*offsets
;
21369 func_type
= arm_current_func_type ();
21371 /* Naked functions don't have prologues. */
21372 if (IS_NAKED (func_type
))
21375 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21376 args_to_push
= crtl
->args
.pretend_args_size
;
21378 /* Compute which register we will have to save onto the stack. */
21379 offsets
= arm_get_frame_offsets ();
21380 live_regs_mask
= offsets
->saved_regs_mask
;
21382 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21384 if (IS_STACKALIGN (func_type
))
21388 /* Handle a word-aligned stack pointer. We generate the following:
21393 <save and restore r0 in normal prologue/epilogue>
21397 The unwinder doesn't need to know about the stack realignment.
21398 Just tell it we saved SP in r0. */
21399 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21401 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21402 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21404 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21405 RTX_FRAME_RELATED_P (insn
) = 1;
21406 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21408 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21410 /* ??? The CFA changes here, which may cause GDB to conclude that it
21411 has entered a different function. That said, the unwind info is
21412 correct, individually, before and after this instruction because
21413 we've described the save of SP, which will override the default
21414 handling of SP as restoring from the CFA. */
21415 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21418 /* The static chain register is the same as the IP register. If it is
21419 clobbered when creating the frame, we need to save and restore it. */
21420 clobber_ip
= IS_NESTED (func_type
)
21421 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21422 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21423 && !df_regs_ever_live_p (LR_REGNUM
)
21424 && arm_r3_live_at_start_p ()));
21426 /* Find somewhere to store IP whilst the frame is being created.
21427 We try the following places in order:
21429 1. The last argument register r3 if it is available.
21430 2. A slot on the stack above the frame if there are no
21431 arguments to push onto the stack.
21432 3. Register r3 again, after pushing the argument registers
21433 onto the stack, if this is a varargs function.
21434 4. The last slot on the stack created for the arguments to
21435 push, if this isn't a varargs function.
21437 Note - we only need to tell the dwarf2 backend about the SP
21438 adjustment in the second variant; the static chain register
21439 doesn't need to be unwound, as it doesn't contain a value
21440 inherited from the caller. */
21443 if (!arm_r3_live_at_start_p ())
21444 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21445 else if (args_to_push
== 0)
21449 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21452 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21453 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21456 /* Just tell the dwarf backend that we adjusted SP. */
21457 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21458 plus_constant (Pmode
, stack_pointer_rtx
,
21460 RTX_FRAME_RELATED_P (insn
) = 1;
21461 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21465 /* Store the args on the stack. */
21466 if (cfun
->machine
->uses_anonymous_args
)
21468 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21469 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21470 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21471 saved_pretend_args
= 1;
21477 if (args_to_push
== 4)
21478 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21480 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21481 plus_constant (Pmode
,
21485 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21487 /* Just tell the dwarf backend that we adjusted SP. */
21488 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21489 plus_constant (Pmode
, stack_pointer_rtx
,
21491 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21494 RTX_FRAME_RELATED_P (insn
) = 1;
21495 fp_offset
= args_to_push
;
21500 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21502 if (IS_INTERRUPT (func_type
))
21504 /* Interrupt functions must not corrupt any registers.
21505 Creating a frame pointer however, corrupts the IP
21506 register, so we must push it first. */
21507 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21509 /* Do not set RTX_FRAME_RELATED_P on this insn.
21510 The dwarf stack unwinding code only wants to see one
21511 stack decrement per function, and this is not it. If
21512 this instruction is labeled as being part of the frame
21513 creation sequence then dwarf2out_frame_debug_expr will
21514 die when it encounters the assignment of IP to FP
21515 later on, since the use of SP here establishes SP as
21516 the CFA register and not IP.
21518 Anyway this instruction is not really part of the stack
21519 frame creation although it is part of the prologue. */
21522 insn
= emit_set_insn (ip_rtx
,
21523 plus_constant (Pmode
, stack_pointer_rtx
,
21525 RTX_FRAME_RELATED_P (insn
) = 1;
21530 /* Push the argument registers, or reserve space for them. */
21531 if (cfun
->machine
->uses_anonymous_args
)
21532 insn
= emit_multi_reg_push
21533 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21534 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21537 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21538 GEN_INT (- args_to_push
)));
21539 RTX_FRAME_RELATED_P (insn
) = 1;
21542 /* If this is an interrupt service routine, and the link register
21543 is going to be pushed, and we're not generating extra
21544 push of IP (needed when frame is needed and frame layout if apcs),
21545 subtracting four from LR now will mean that the function return
21546 can be done with a single instruction. */
21547 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21548 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21549 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21552 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21554 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21557 if (live_regs_mask
)
21559 unsigned long dwarf_regs_mask
= live_regs_mask
;
21561 saved_regs
+= bit_count (live_regs_mask
) * 4;
21562 if (optimize_size
&& !frame_pointer_needed
21563 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21565 /* If no coprocessor registers are being pushed and we don't have
21566 to worry about a frame pointer then push extra registers to
21567 create the stack frame. This is done is a way that does not
21568 alter the frame layout, so is independent of the epilogue. */
21572 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21574 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21575 if (frame
&& n
* 4 >= frame
)
21578 live_regs_mask
|= (1 << n
) - 1;
21579 saved_regs
+= frame
;
21584 && current_tune
->prefer_ldrd_strd
21585 && !optimize_function_for_size_p (cfun
))
21587 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21589 thumb2_emit_strd_push (live_regs_mask
);
21590 else if (TARGET_ARM
21591 && !TARGET_APCS_FRAME
21592 && !IS_INTERRUPT (func_type
))
21593 arm_emit_strd_push (live_regs_mask
);
21596 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21597 RTX_FRAME_RELATED_P (insn
) = 1;
21602 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21603 RTX_FRAME_RELATED_P (insn
) = 1;
21607 if (! IS_VOLATILE (func_type
))
21608 saved_regs
+= arm_save_coproc_regs ();
21610 if (frame_pointer_needed
&& TARGET_ARM
)
21612 /* Create the new frame pointer. */
21613 if (TARGET_APCS_FRAME
)
21615 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21616 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21617 RTX_FRAME_RELATED_P (insn
) = 1;
21621 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21622 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21623 stack_pointer_rtx
, insn
));
21624 RTX_FRAME_RELATED_P (insn
) = 1;
21628 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21629 if (flag_stack_usage_info
)
21630 current_function_static_stack_size
= size
;
21632 /* If this isn't an interrupt service routine and we have a frame, then do
21633 stack checking. We use IP as the first scratch register, except for the
21634 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21635 if (!IS_INTERRUPT (func_type
)
21636 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21638 unsigned int regno
;
21640 if (!IS_NESTED (func_type
) || clobber_ip
)
21642 else if (df_regs_ever_live_p (LR_REGNUM
))
21647 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21649 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21650 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21651 size
- STACK_CHECK_PROTECT
,
21652 regno
, live_regs_mask
);
21655 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21656 regno
, live_regs_mask
);
21659 /* Recover the static chain register. */
21662 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21663 insn
= gen_rtx_REG (SImode
, 3);
21666 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21667 insn
= gen_frame_mem (SImode
, insn
);
21669 emit_set_insn (ip_rtx
, insn
);
21670 emit_insn (gen_force_register_use (ip_rtx
));
21673 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21675 /* This add can produce multiple insns for a large constant, so we
21676 need to get tricky. */
21677 rtx_insn
*last
= get_last_insn ();
21679 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21680 - offsets
->outgoing_args
);
21682 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21686 last
= last
? NEXT_INSN (last
) : get_insns ();
21687 RTX_FRAME_RELATED_P (last
) = 1;
21689 while (last
!= insn
);
21691 /* If the frame pointer is needed, emit a special barrier that
21692 will prevent the scheduler from moving stores to the frame
21693 before the stack adjustment. */
21694 if (frame_pointer_needed
)
21695 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21696 hard_frame_pointer_rtx
));
21700 if (frame_pointer_needed
&& TARGET_THUMB2
)
21701 thumb_set_frame_pointer (offsets
);
21703 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21705 unsigned long mask
;
21707 mask
= live_regs_mask
;
21708 mask
&= THUMB2_WORK_REGS
;
21709 if (!IS_NESTED (func_type
))
21710 mask
|= (1 << IP_REGNUM
);
21711 arm_load_pic_register (mask
);
21714 /* If we are profiling, make sure no instructions are scheduled before
21715 the call to mcount. Similarly if the user has requested no
21716 scheduling in the prolog. Similarly if we want non-call exceptions
21717 using the EABI unwinder, to prevent faulting instructions from being
21718 swapped with a stack adjustment. */
21719 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21720 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21721 && cfun
->can_throw_non_call_exceptions
))
21722 emit_insn (gen_blockage ());
21724 /* If the link register is being kept alive, with the return address in it,
21725 then make sure that it does not get reused by the ce2 pass. */
21726 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21727 cfun
->machine
->lr_save_eliminated
= 1;
21730 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21732 arm_print_condition (FILE *stream
)
21734 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21736 /* Branch conversion is not implemented for Thumb-2. */
21739 output_operand_lossage ("predicated Thumb instruction");
21742 if (current_insn_predicate
!= NULL
)
21744 output_operand_lossage
21745 ("predicated instruction in conditional sequence");
21749 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21751 else if (current_insn_predicate
)
21753 enum arm_cond_code code
;
21757 output_operand_lossage ("predicated Thumb instruction");
21761 code
= get_arm_condition_code (current_insn_predicate
);
21762 fputs (arm_condition_codes
[code
], stream
);
21767 /* Globally reserved letters: acln
21768 Puncutation letters currently used: @_|?().!#
21769 Lower case letters currently used: bcdefhimpqtvwxyz
21770 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21771 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21773 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21775 If CODE is 'd', then the X is a condition operand and the instruction
21776 should only be executed if the condition is true.
21777 if CODE is 'D', then the X is a condition operand and the instruction
21778 should only be executed if the condition is false: however, if the mode
21779 of the comparison is CCFPEmode, then always execute the instruction -- we
21780 do this because in these circumstances !GE does not necessarily imply LT;
21781 in these cases the instruction pattern will take care to make sure that
21782 an instruction containing %d will follow, thereby undoing the effects of
21783 doing this instruction unconditionally.
21784 If CODE is 'N' then X is a floating point operand that must be negated
21786 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21787 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21789 arm_print_operand (FILE *stream
, rtx x
, int code
)
21794 fputs (ASM_COMMENT_START
, stream
);
21798 fputs (user_label_prefix
, stream
);
21802 fputs (REGISTER_PREFIX
, stream
);
21806 arm_print_condition (stream
);
21810 /* Nothing in unified syntax, otherwise the current condition code. */
21811 if (!TARGET_UNIFIED_ASM
)
21812 arm_print_condition (stream
);
21816 /* The current condition code in unified syntax, otherwise nothing. */
21817 if (TARGET_UNIFIED_ASM
)
21818 arm_print_condition (stream
);
21822 /* The current condition code for a condition code setting instruction.
21823 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21824 if (TARGET_UNIFIED_ASM
)
21826 fputc('s', stream
);
21827 arm_print_condition (stream
);
21831 arm_print_condition (stream
);
21832 fputc('s', stream
);
21837 /* If the instruction is conditionally executed then print
21838 the current condition code, otherwise print 's'. */
21839 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
21840 if (current_insn_predicate
)
21841 arm_print_condition (stream
);
21843 fputc('s', stream
);
21846 /* %# is a "break" sequence. It doesn't output anything, but is used to
21847 separate e.g. operand numbers from following text, if that text consists
21848 of further digits which we don't want to be part of the operand
21856 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21857 fprintf (stream
, "%s", fp_const_from_val (&r
));
21861 /* An integer or symbol address without a preceding # sign. */
21863 switch (GET_CODE (x
))
21866 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21870 output_addr_const (stream
, x
);
21874 if (GET_CODE (XEXP (x
, 0)) == PLUS
21875 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21877 output_addr_const (stream
, x
);
21880 /* Fall through. */
21883 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21887 /* An integer that we want to print in HEX. */
21889 switch (GET_CODE (x
))
21892 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21896 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21901 if (CONST_INT_P (x
))
21904 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21905 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21909 putc ('~', stream
);
21910 output_addr_const (stream
, x
);
21915 /* Print the log2 of a CONST_INT. */
21919 if (!CONST_INT_P (x
)
21920 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21921 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21923 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21928 /* The low 16 bits of an immediate constant. */
21929 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21933 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21937 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21945 shift
= shift_op (x
, &val
);
21949 fprintf (stream
, ", %s ", shift
);
21951 arm_print_operand (stream
, XEXP (x
, 1), 0);
21953 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21958 /* An explanation of the 'Q', 'R' and 'H' register operands:
21960 In a pair of registers containing a DI or DF value the 'Q'
21961 operand returns the register number of the register containing
21962 the least significant part of the value. The 'R' operand returns
21963 the register number of the register containing the most
21964 significant part of the value.
21966 The 'H' operand returns the higher of the two register numbers.
21967 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21968 same as the 'Q' operand, since the most significant part of the
21969 value is held in the lower number register. The reverse is true
21970 on systems where WORDS_BIG_ENDIAN is false.
21972 The purpose of these operands is to distinguish between cases
21973 where the endian-ness of the values is important (for example
21974 when they are added together), and cases where the endian-ness
21975 is irrelevant, but the order of register operations is important.
21976 For example when loading a value from memory into a register
21977 pair, the endian-ness does not matter. Provided that the value
21978 from the lower memory address is put into the lower numbered
21979 register, and the value from the higher address is put into the
21980 higher numbered register, the load will work regardless of whether
21981 the value being loaded is big-wordian or little-wordian. The
21982 order of the two register loads can matter however, if the address
21983 of the memory location is actually held in one of the registers
21984 being overwritten by the load.
21986 The 'Q' and 'R' constraints are also available for 64-bit
21989 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21991 rtx part
= gen_lowpart (SImode
, x
);
21992 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21996 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21998 output_operand_lossage ("invalid operand for code '%c'", code
);
22002 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22006 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22008 machine_mode mode
= GET_MODE (x
);
22011 if (mode
== VOIDmode
)
22013 part
= gen_highpart_mode (SImode
, mode
, x
);
22014 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22018 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22020 output_operand_lossage ("invalid operand for code '%c'", code
);
22024 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22028 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22030 output_operand_lossage ("invalid operand for code '%c'", code
);
22034 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22038 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22040 output_operand_lossage ("invalid operand for code '%c'", code
);
22044 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22048 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22050 output_operand_lossage ("invalid operand for code '%c'", code
);
22054 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22058 asm_fprintf (stream
, "%r",
22059 REG_P (XEXP (x
, 0))
22060 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22064 asm_fprintf (stream
, "{%r-%r}",
22066 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22069 /* Like 'M', but writing doubleword vector registers, for use by Neon
22073 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22074 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22076 asm_fprintf (stream
, "{d%d}", regno
);
22078 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22083 /* CONST_TRUE_RTX means always -- that's the default. */
22084 if (x
== const_true_rtx
)
22087 if (!COMPARISON_P (x
))
22089 output_operand_lossage ("invalid operand for code '%c'", code
);
22093 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22098 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22099 want to do that. */
22100 if (x
== const_true_rtx
)
22102 output_operand_lossage ("instruction never executed");
22105 if (!COMPARISON_P (x
))
22107 output_operand_lossage ("invalid operand for code '%c'", code
);
22111 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22112 (get_arm_condition_code (x
))],
22122 /* Former Maverick support, removed after GCC-4.7. */
22123 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22128 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22129 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22130 /* Bad value for wCG register number. */
22132 output_operand_lossage ("invalid operand for code '%c'", code
);
22137 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22140 /* Print an iWMMXt control register name. */
22142 if (!CONST_INT_P (x
)
22144 || INTVAL (x
) >= 16)
22145 /* Bad value for wC register number. */
22147 output_operand_lossage ("invalid operand for code '%c'", code
);
22153 static const char * wc_reg_names
[16] =
22155 "wCID", "wCon", "wCSSF", "wCASF",
22156 "wC4", "wC5", "wC6", "wC7",
22157 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22158 "wC12", "wC13", "wC14", "wC15"
22161 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22165 /* Print the high single-precision register of a VFP double-precision
22169 machine_mode mode
= GET_MODE (x
);
22172 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22174 output_operand_lossage ("invalid operand for code '%c'", code
);
22179 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22181 output_operand_lossage ("invalid operand for code '%c'", code
);
22185 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22189 /* Print a VFP/Neon double precision or quad precision register name. */
22193 machine_mode mode
= GET_MODE (x
);
22194 int is_quad
= (code
== 'q');
22197 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22199 output_operand_lossage ("invalid operand for code '%c'", code
);
22204 || !IS_VFP_REGNUM (REGNO (x
)))
22206 output_operand_lossage ("invalid operand for code '%c'", code
);
22211 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22212 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22214 output_operand_lossage ("invalid operand for code '%c'", code
);
22218 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22219 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22223 /* These two codes print the low/high doubleword register of a Neon quad
22224 register, respectively. For pair-structure types, can also print
22225 low/high quadword registers. */
22229 machine_mode mode
= GET_MODE (x
);
22232 if ((GET_MODE_SIZE (mode
) != 16
22233 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22235 output_operand_lossage ("invalid operand for code '%c'", code
);
22240 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22242 output_operand_lossage ("invalid operand for code '%c'", code
);
22246 if (GET_MODE_SIZE (mode
) == 16)
22247 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22248 + (code
== 'f' ? 1 : 0));
22250 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22251 + (code
== 'f' ? 1 : 0));
22255 /* Print a VFPv3 floating-point constant, represented as an integer
22259 int index
= vfp3_const_double_index (x
);
22260 gcc_assert (index
!= -1);
22261 fprintf (stream
, "%d", index
);
22265 /* Print bits representing opcode features for Neon.
22267 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22268 and polynomials as unsigned.
22270 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22272 Bit 2 is 1 for rounding functions, 0 otherwise. */
22274 /* Identify the type as 's', 'u', 'p' or 'f'. */
22277 HOST_WIDE_INT bits
= INTVAL (x
);
22278 fputc ("uspf"[bits
& 3], stream
);
22282 /* Likewise, but signed and unsigned integers are both 'i'. */
22285 HOST_WIDE_INT bits
= INTVAL (x
);
22286 fputc ("iipf"[bits
& 3], stream
);
22290 /* As for 'T', but emit 'u' instead of 'p'. */
22293 HOST_WIDE_INT bits
= INTVAL (x
);
22294 fputc ("usuf"[bits
& 3], stream
);
22298 /* Bit 2: rounding (vs none). */
22301 HOST_WIDE_INT bits
= INTVAL (x
);
22302 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22306 /* Memory operand for vld1/vst1 instruction. */
22310 bool postinc
= FALSE
;
22311 rtx postinc_reg
= NULL
;
22312 unsigned align
, memsize
, align_bits
;
22314 gcc_assert (MEM_P (x
));
22315 addr
= XEXP (x
, 0);
22316 if (GET_CODE (addr
) == POST_INC
)
22319 addr
= XEXP (addr
, 0);
22321 if (GET_CODE (addr
) == POST_MODIFY
)
22323 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22324 addr
= XEXP (addr
, 0);
22326 asm_fprintf (stream
, "[%r", REGNO (addr
));
22328 /* We know the alignment of this access, so we can emit a hint in the
22329 instruction (for some alignments) as an aid to the memory subsystem
22331 align
= MEM_ALIGN (x
) >> 3;
22332 memsize
= MEM_SIZE (x
);
22334 /* Only certain alignment specifiers are supported by the hardware. */
22335 if (memsize
== 32 && (align
% 32) == 0)
22337 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22339 else if (memsize
>= 8 && (align
% 8) == 0)
22344 if (align_bits
!= 0)
22345 asm_fprintf (stream
, ":%d", align_bits
);
22347 asm_fprintf (stream
, "]");
22350 fputs("!", stream
);
22352 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22360 gcc_assert (MEM_P (x
));
22361 addr
= XEXP (x
, 0);
22362 gcc_assert (REG_P (addr
));
22363 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22367 /* Translate an S register number into a D register number and element index. */
22370 machine_mode mode
= GET_MODE (x
);
22373 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22375 output_operand_lossage ("invalid operand for code '%c'", code
);
22380 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22382 output_operand_lossage ("invalid operand for code '%c'", code
);
22386 regno
= regno
- FIRST_VFP_REGNUM
;
22387 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22392 gcc_assert (CONST_DOUBLE_P (x
));
22394 result
= vfp3_const_double_for_fract_bits (x
);
22396 result
= vfp3_const_double_for_bits (x
);
22397 fprintf (stream
, "#%d", result
);
22400 /* Register specifier for vld1.16/vst1.16. Translate the S register
22401 number into a D register number and element index. */
22404 machine_mode mode
= GET_MODE (x
);
22407 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22409 output_operand_lossage ("invalid operand for code '%c'", code
);
22414 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22416 output_operand_lossage ("invalid operand for code '%c'", code
);
22420 regno
= regno
- FIRST_VFP_REGNUM
;
22421 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22428 output_operand_lossage ("missing operand");
22432 switch (GET_CODE (x
))
22435 asm_fprintf (stream
, "%r", REGNO (x
));
22439 output_memory_reference_mode
= GET_MODE (x
);
22440 output_address (XEXP (x
, 0));
22446 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22447 sizeof (fpstr
), 0, 1);
22448 fprintf (stream
, "#%s", fpstr
);
22453 gcc_assert (GET_CODE (x
) != NEG
);
22454 fputc ('#', stream
);
22455 if (GET_CODE (x
) == HIGH
)
22457 fputs (":lower16:", stream
);
22461 output_addr_const (stream
, x
);
22467 /* Target hook for printing a memory address. */
22469 arm_print_operand_address (FILE *stream
, rtx x
)
22473 int is_minus
= GET_CODE (x
) == MINUS
;
22476 asm_fprintf (stream
, "[%r]", REGNO (x
));
22477 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22479 rtx base
= XEXP (x
, 0);
22480 rtx index
= XEXP (x
, 1);
22481 HOST_WIDE_INT offset
= 0;
22483 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22485 /* Ensure that BASE is a register. */
22486 /* (one of them must be). */
22487 /* Also ensure the SP is not used as in index register. */
22488 std::swap (base
, index
);
22490 switch (GET_CODE (index
))
22493 offset
= INTVAL (index
);
22496 asm_fprintf (stream
, "[%r, #%wd]",
22497 REGNO (base
), offset
);
22501 asm_fprintf (stream
, "[%r, %s%r]",
22502 REGNO (base
), is_minus
? "-" : "",
22512 asm_fprintf (stream
, "[%r, %s%r",
22513 REGNO (base
), is_minus
? "-" : "",
22514 REGNO (XEXP (index
, 0)));
22515 arm_print_operand (stream
, index
, 'S');
22516 fputs ("]", stream
);
22521 gcc_unreachable ();
22524 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22525 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22527 extern machine_mode output_memory_reference_mode
;
22529 gcc_assert (REG_P (XEXP (x
, 0)));
22531 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22532 asm_fprintf (stream
, "[%r, #%s%d]!",
22533 REGNO (XEXP (x
, 0)),
22534 GET_CODE (x
) == PRE_DEC
? "-" : "",
22535 GET_MODE_SIZE (output_memory_reference_mode
));
22537 asm_fprintf (stream
, "[%r], #%s%d",
22538 REGNO (XEXP (x
, 0)),
22539 GET_CODE (x
) == POST_DEC
? "-" : "",
22540 GET_MODE_SIZE (output_memory_reference_mode
));
22542 else if (GET_CODE (x
) == PRE_MODIFY
)
22544 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22545 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22546 asm_fprintf (stream
, "#%wd]!",
22547 INTVAL (XEXP (XEXP (x
, 1), 1)));
22549 asm_fprintf (stream
, "%r]!",
22550 REGNO (XEXP (XEXP (x
, 1), 1)));
22552 else if (GET_CODE (x
) == POST_MODIFY
)
22554 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22555 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22556 asm_fprintf (stream
, "#%wd",
22557 INTVAL (XEXP (XEXP (x
, 1), 1)));
22559 asm_fprintf (stream
, "%r",
22560 REGNO (XEXP (XEXP (x
, 1), 1)));
22562 else output_addr_const (stream
, x
);
22567 asm_fprintf (stream
, "[%r]", REGNO (x
));
22568 else if (GET_CODE (x
) == POST_INC
)
22569 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22570 else if (GET_CODE (x
) == PLUS
)
22572 gcc_assert (REG_P (XEXP (x
, 0)));
22573 if (CONST_INT_P (XEXP (x
, 1)))
22574 asm_fprintf (stream
, "[%r, #%wd]",
22575 REGNO (XEXP (x
, 0)),
22576 INTVAL (XEXP (x
, 1)));
22578 asm_fprintf (stream
, "[%r, %r]",
22579 REGNO (XEXP (x
, 0)),
22580 REGNO (XEXP (x
, 1)));
22583 output_addr_const (stream
, x
);
22587 /* Target hook for indicating whether a punctuation character for
22588 TARGET_PRINT_OPERAND is valid. */
22590 arm_print_operand_punct_valid_p (unsigned char code
)
22592 return (code
== '@' || code
== '|' || code
== '.'
22593 || code
== '(' || code
== ')' || code
== '#'
22594 || (TARGET_32BIT
&& (code
== '?'))
22595 || (TARGET_THUMB2
&& (code
== '!'))
22596 || (TARGET_THUMB
&& (code
== '_')));
22599 /* Target hook for assembling integer objects. The ARM version needs to
22600 handle word-sized values specially. */
22602 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22606 if (size
== UNITS_PER_WORD
&& aligned_p
)
22608 fputs ("\t.word\t", asm_out_file
);
22609 output_addr_const (asm_out_file
, x
);
22611 /* Mark symbols as position independent. We only do this in the
22612 .text segment, not in the .data segment. */
22613 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22614 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22616 /* See legitimize_pic_address for an explanation of the
22617 TARGET_VXWORKS_RTP check. */
22618 if (!arm_pic_data_is_text_relative
22619 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
22620 fputs ("(GOT)", asm_out_file
);
22622 fputs ("(GOTOFF)", asm_out_file
);
22624 fputc ('\n', asm_out_file
);
22628 mode
= GET_MODE (x
);
22630 if (arm_vector_mode_supported_p (mode
))
22634 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22636 units
= CONST_VECTOR_NUNITS (x
);
22637 size
= GET_MODE_UNIT_SIZE (mode
);
22639 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22640 for (i
= 0; i
< units
; i
++)
22642 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22644 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22647 for (i
= 0; i
< units
; i
++)
22649 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22651 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
22652 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22658 return default_assemble_integer (x
, size
, aligned_p
);
22662 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22666 if (!TARGET_AAPCS_BASED
)
22669 default_named_section_asm_out_constructor
22670 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22674 /* Put these in the .init_array section, using a special relocation. */
22675 if (priority
!= DEFAULT_INIT_PRIORITY
)
22678 sprintf (buf
, "%s.%.5u",
22679 is_ctor
? ".init_array" : ".fini_array",
22681 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
22688 switch_to_section (s
);
22689 assemble_align (POINTER_SIZE
);
22690 fputs ("\t.word\t", asm_out_file
);
22691 output_addr_const (asm_out_file
, symbol
);
22692 fputs ("(target1)\n", asm_out_file
);
22695 /* Add a function to the list of static constructors. */
22698 arm_elf_asm_constructor (rtx symbol
, int priority
)
22700 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22703 /* Add a function to the list of static destructors. */
22706 arm_elf_asm_destructor (rtx symbol
, int priority
)
22708 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22711 /* A finite state machine takes care of noticing whether or not instructions
22712 can be conditionally executed, and thus decrease execution time and code
22713 size by deleting branch instructions. The fsm is controlled by
22714 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22716 /* The state of the fsm controlling condition codes are:
22717 0: normal, do nothing special
22718 1: make ASM_OUTPUT_OPCODE not output this instruction
22719 2: make ASM_OUTPUT_OPCODE not output this instruction
22720 3: make instructions conditional
22721 4: make instructions conditional
22723 State transitions (state->state by whom under condition):
22724 0 -> 1 final_prescan_insn if the `target' is a label
22725 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22726 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22727 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22728 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22729 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22730 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22731 (the target insn is arm_target_insn).
22733 If the jump clobbers the conditions then we use states 2 and 4.
22735 A similar thing can be done with conditional return insns.
22737 XXX In case the `target' is an unconditional branch, this conditionalising
22738 of the instructions always reduces code size, but not always execution
22739 time. But then, I want to reduce the code size to somewhere near what
22740 /bin/cc produces. */
22742 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22743 instructions. When a COND_EXEC instruction is seen the subsequent
22744 instructions are scanned so that multiple conditional instructions can be
22745 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22746 specify the length and true/false mask for the IT block. These will be
22747 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22749 /* Returns the index of the ARM condition code string in
22750 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22751 COMPARISON should be an rtx like `(eq (...) (...))'. */
22754 maybe_get_arm_condition_code (rtx comparison
)
22756 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22757 enum arm_cond_code code
;
22758 enum rtx_code comp_code
= GET_CODE (comparison
);
22760 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22761 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22762 XEXP (comparison
, 1));
22766 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22767 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22768 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22769 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22770 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22771 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22772 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22773 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22774 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22775 case CC_DLTUmode
: code
= ARM_CC
;
22778 if (comp_code
== EQ
)
22779 return ARM_INVERSE_CONDITION_CODE (code
);
22780 if (comp_code
== NE
)
22787 case NE
: return ARM_NE
;
22788 case EQ
: return ARM_EQ
;
22789 case GE
: return ARM_PL
;
22790 case LT
: return ARM_MI
;
22791 default: return ARM_NV
;
22797 case NE
: return ARM_NE
;
22798 case EQ
: return ARM_EQ
;
22799 default: return ARM_NV
;
22805 case NE
: return ARM_MI
;
22806 case EQ
: return ARM_PL
;
22807 default: return ARM_NV
;
22812 /* We can handle all cases except UNEQ and LTGT. */
22815 case GE
: return ARM_GE
;
22816 case GT
: return ARM_GT
;
22817 case LE
: return ARM_LS
;
22818 case LT
: return ARM_MI
;
22819 case NE
: return ARM_NE
;
22820 case EQ
: return ARM_EQ
;
22821 case ORDERED
: return ARM_VC
;
22822 case UNORDERED
: return ARM_VS
;
22823 case UNLT
: return ARM_LT
;
22824 case UNLE
: return ARM_LE
;
22825 case UNGT
: return ARM_HI
;
22826 case UNGE
: return ARM_PL
;
22827 /* UNEQ and LTGT do not have a representation. */
22828 case UNEQ
: /* Fall through. */
22829 case LTGT
: /* Fall through. */
22830 default: return ARM_NV
;
22836 case NE
: return ARM_NE
;
22837 case EQ
: return ARM_EQ
;
22838 case GE
: return ARM_LE
;
22839 case GT
: return ARM_LT
;
22840 case LE
: return ARM_GE
;
22841 case LT
: return ARM_GT
;
22842 case GEU
: return ARM_LS
;
22843 case GTU
: return ARM_CC
;
22844 case LEU
: return ARM_CS
;
22845 case LTU
: return ARM_HI
;
22846 default: return ARM_NV
;
22852 case LTU
: return ARM_CS
;
22853 case GEU
: return ARM_CC
;
22854 default: return ARM_NV
;
22860 case NE
: return ARM_NE
;
22861 case EQ
: return ARM_EQ
;
22862 case GEU
: return ARM_CS
;
22863 case GTU
: return ARM_HI
;
22864 case LEU
: return ARM_LS
;
22865 case LTU
: return ARM_CC
;
22866 default: return ARM_NV
;
22872 case GE
: return ARM_GE
;
22873 case LT
: return ARM_LT
;
22874 case GEU
: return ARM_CS
;
22875 case LTU
: return ARM_CC
;
22876 default: return ARM_NV
;
22882 case NE
: return ARM_NE
;
22883 case EQ
: return ARM_EQ
;
22884 case GE
: return ARM_GE
;
22885 case GT
: return ARM_GT
;
22886 case LE
: return ARM_LE
;
22887 case LT
: return ARM_LT
;
22888 case GEU
: return ARM_CS
;
22889 case GTU
: return ARM_HI
;
22890 case LEU
: return ARM_LS
;
22891 case LTU
: return ARM_CC
;
22892 default: return ARM_NV
;
22895 default: gcc_unreachable ();
22899 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22900 static enum arm_cond_code
22901 get_arm_condition_code (rtx comparison
)
22903 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22904 gcc_assert (code
!= ARM_NV
);
22908 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22911 thumb2_final_prescan_insn (rtx_insn
*insn
)
22913 rtx_insn
*first_insn
= insn
;
22914 rtx body
= PATTERN (insn
);
22916 enum arm_cond_code code
;
22921 /* max_insns_skipped in the tune was already taken into account in the
22922 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22923 just emit the IT blocks as we can. It does not make sense to split
22925 max
= MAX_INSN_PER_IT_BLOCK
;
22927 /* Remove the previous insn from the count of insns to be output. */
22928 if (arm_condexec_count
)
22929 arm_condexec_count
--;
22931 /* Nothing to do if we are already inside a conditional block. */
22932 if (arm_condexec_count
)
22935 if (GET_CODE (body
) != COND_EXEC
)
22938 /* Conditional jumps are implemented directly. */
22942 predicate
= COND_EXEC_TEST (body
);
22943 arm_current_cc
= get_arm_condition_code (predicate
);
22945 n
= get_attr_ce_count (insn
);
22946 arm_condexec_count
= 1;
22947 arm_condexec_mask
= (1 << n
) - 1;
22948 arm_condexec_masklen
= n
;
22949 /* See if subsequent instructions can be combined into the same block. */
22952 insn
= next_nonnote_insn (insn
);
22954 /* Jumping into the middle of an IT block is illegal, so a label or
22955 barrier terminates the block. */
22956 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22959 body
= PATTERN (insn
);
22960 /* USE and CLOBBER aren't really insns, so just skip them. */
22961 if (GET_CODE (body
) == USE
22962 || GET_CODE (body
) == CLOBBER
)
22965 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22966 if (GET_CODE (body
) != COND_EXEC
)
22968 /* Maximum number of conditionally executed instructions in a block. */
22969 n
= get_attr_ce_count (insn
);
22970 if (arm_condexec_masklen
+ n
> max
)
22973 predicate
= COND_EXEC_TEST (body
);
22974 code
= get_arm_condition_code (predicate
);
22975 mask
= (1 << n
) - 1;
22976 if (arm_current_cc
== code
)
22977 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22978 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22981 arm_condexec_count
++;
22982 arm_condexec_masklen
+= n
;
22984 /* A jump must be the last instruction in a conditional block. */
22988 /* Restore recog_data (getting the attributes of other insns can
22989 destroy this array, but final.c assumes that it remains intact
22990 across this call). */
22991 extract_constrain_insn_cached (first_insn
);
22995 arm_final_prescan_insn (rtx_insn
*insn
)
22997 /* BODY will hold the body of INSN. */
22998 rtx body
= PATTERN (insn
);
23000 /* This will be 1 if trying to repeat the trick, and things need to be
23001 reversed if it appears to fail. */
23004 /* If we start with a return insn, we only succeed if we find another one. */
23005 int seeking_return
= 0;
23006 enum rtx_code return_code
= UNKNOWN
;
23008 /* START_INSN will hold the insn from where we start looking. This is the
23009 first insn after the following code_label if REVERSE is true. */
23010 rtx_insn
*start_insn
= insn
;
23012 /* If in state 4, check if the target branch is reached, in order to
23013 change back to state 0. */
23014 if (arm_ccfsm_state
== 4)
23016 if (insn
== arm_target_insn
)
23018 arm_target_insn
= NULL
;
23019 arm_ccfsm_state
= 0;
23024 /* If in state 3, it is possible to repeat the trick, if this insn is an
23025 unconditional branch to a label, and immediately following this branch
23026 is the previous target label which is only used once, and the label this
23027 branch jumps to is not too far off. */
23028 if (arm_ccfsm_state
== 3)
23030 if (simplejump_p (insn
))
23032 start_insn
= next_nonnote_insn (start_insn
);
23033 if (BARRIER_P (start_insn
))
23035 /* XXX Isn't this always a barrier? */
23036 start_insn
= next_nonnote_insn (start_insn
);
23038 if (LABEL_P (start_insn
)
23039 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23040 && LABEL_NUSES (start_insn
) == 1)
23045 else if (ANY_RETURN_P (body
))
23047 start_insn
= next_nonnote_insn (start_insn
);
23048 if (BARRIER_P (start_insn
))
23049 start_insn
= next_nonnote_insn (start_insn
);
23050 if (LABEL_P (start_insn
)
23051 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23052 && LABEL_NUSES (start_insn
) == 1)
23055 seeking_return
= 1;
23056 return_code
= GET_CODE (body
);
23065 gcc_assert (!arm_ccfsm_state
|| reverse
);
23066 if (!JUMP_P (insn
))
23069 /* This jump might be paralleled with a clobber of the condition codes
23070 the jump should always come first */
23071 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23072 body
= XVECEXP (body
, 0, 0);
23075 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23076 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23079 int fail
= FALSE
, succeed
= FALSE
;
23080 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23081 int then_not_else
= TRUE
;
23082 rtx_insn
*this_insn
= start_insn
;
23085 /* Register the insn jumped to. */
23088 if (!seeking_return
)
23089 label
= XEXP (SET_SRC (body
), 0);
23091 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23092 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23093 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23095 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23096 then_not_else
= FALSE
;
23098 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23100 seeking_return
= 1;
23101 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23103 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23105 seeking_return
= 1;
23106 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23107 then_not_else
= FALSE
;
23110 gcc_unreachable ();
23112 /* See how many insns this branch skips, and what kind of insns. If all
23113 insns are okay, and the label or unconditional branch to the same
23114 label is not too far away, succeed. */
23115 for (insns_skipped
= 0;
23116 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23120 this_insn
= next_nonnote_insn (this_insn
);
23124 switch (GET_CODE (this_insn
))
23127 /* Succeed if it is the target label, otherwise fail since
23128 control falls in from somewhere else. */
23129 if (this_insn
== label
)
23131 arm_ccfsm_state
= 1;
23139 /* Succeed if the following insn is the target label.
23141 If return insns are used then the last insn in a function
23142 will be a barrier. */
23143 this_insn
= next_nonnote_insn (this_insn
);
23144 if (this_insn
&& this_insn
== label
)
23146 arm_ccfsm_state
= 1;
23154 /* The AAPCS says that conditional calls should not be
23155 used since they make interworking inefficient (the
23156 linker can't transform BL<cond> into BLX). That's
23157 only a problem if the machine has BLX. */
23164 /* Succeed if the following insn is the target label, or
23165 if the following two insns are a barrier and the
23167 this_insn
= next_nonnote_insn (this_insn
);
23168 if (this_insn
&& BARRIER_P (this_insn
))
23169 this_insn
= next_nonnote_insn (this_insn
);
23171 if (this_insn
&& this_insn
== label
23172 && insns_skipped
< max_insns_skipped
)
23174 arm_ccfsm_state
= 1;
23182 /* If this is an unconditional branch to the same label, succeed.
23183 If it is to another label, do nothing. If it is conditional,
23185 /* XXX Probably, the tests for SET and the PC are
23188 scanbody
= PATTERN (this_insn
);
23189 if (GET_CODE (scanbody
) == SET
23190 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23192 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23193 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23195 arm_ccfsm_state
= 2;
23198 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23201 /* Fail if a conditional return is undesirable (e.g. on a
23202 StrongARM), but still allow this if optimizing for size. */
23203 else if (GET_CODE (scanbody
) == return_code
23204 && !use_return_insn (TRUE
, NULL
)
23207 else if (GET_CODE (scanbody
) == return_code
)
23209 arm_ccfsm_state
= 2;
23212 else if (GET_CODE (scanbody
) == PARALLEL
)
23214 switch (get_attr_conds (this_insn
))
23224 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23229 /* Instructions using or affecting the condition codes make it
23231 scanbody
= PATTERN (this_insn
);
23232 if (!(GET_CODE (scanbody
) == SET
23233 || GET_CODE (scanbody
) == PARALLEL
)
23234 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23244 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23245 arm_target_label
= CODE_LABEL_NUMBER (label
);
23248 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23250 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23252 this_insn
= next_nonnote_insn (this_insn
);
23253 gcc_assert (!this_insn
23254 || (!BARRIER_P (this_insn
)
23255 && !LABEL_P (this_insn
)));
23259 /* Oh, dear! we ran off the end.. give up. */
23260 extract_constrain_insn_cached (insn
);
23261 arm_ccfsm_state
= 0;
23262 arm_target_insn
= NULL
;
23265 arm_target_insn
= this_insn
;
23268 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23271 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23273 if (reverse
|| then_not_else
)
23274 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23277 /* Restore recog_data (getting the attributes of other insns can
23278 destroy this array, but final.c assumes that it remains intact
23279 across this call. */
23280 extract_constrain_insn_cached (insn
);
23284 /* Output IT instructions. */
23286 thumb2_asm_output_opcode (FILE * stream
)
23291 if (arm_condexec_mask
)
23293 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23294 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23296 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23297 arm_condition_codes
[arm_current_cc
]);
23298 arm_condexec_mask
= 0;
23302 /* Returns true if REGNO is a valid register
23303 for holding a quantity of type MODE. */
23305 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23307 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23308 return (regno
== CC_REGNUM
23309 || (TARGET_HARD_FLOAT
&& TARGET_VFP
23310 && regno
== VFPCC_REGNUM
));
23312 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23316 /* For the Thumb we only allow values bigger than SImode in
23317 registers 0 - 6, so that there is always a second low
23318 register available to hold the upper part of the value.
23319 We probably we ought to ensure that the register is the
23320 start of an even numbered register pair. */
23321 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23323 if (TARGET_HARD_FLOAT
&& TARGET_VFP
23324 && IS_VFP_REGNUM (regno
))
23326 if (mode
== SFmode
|| mode
== SImode
)
23327 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23329 if (mode
== DFmode
)
23330 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23332 /* VFP registers can hold HFmode values, but there is no point in
23333 putting them there unless we have hardware conversion insns. */
23334 if (mode
== HFmode
)
23335 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
23338 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23339 || (VALID_NEON_QREG_MODE (mode
)
23340 && NEON_REGNO_OK_FOR_QUAD (regno
))
23341 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23342 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23343 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23344 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23345 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23350 if (TARGET_REALLY_IWMMXT
)
23352 if (IS_IWMMXT_GR_REGNUM (regno
))
23353 return mode
== SImode
;
23355 if (IS_IWMMXT_REGNUM (regno
))
23356 return VALID_IWMMXT_REG_MODE (mode
);
23359 /* We allow almost any value to be stored in the general registers.
23360 Restrict doubleword quantities to even register pairs in ARM state
23361 so that we can use ldrd. Do not allow very large Neon structure
23362 opaque modes in general registers; they would use too many. */
23363 if (regno
<= LAST_ARM_REGNUM
)
23365 if (ARM_NUM_REGS (mode
) > 4)
23371 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23374 if (regno
== FRAME_POINTER_REGNUM
23375 || regno
== ARG_POINTER_REGNUM
)
23376 /* We only allow integers in the fake hard registers. */
23377 return GET_MODE_CLASS (mode
) == MODE_INT
;
23382 /* Implement MODES_TIEABLE_P. */
23385 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23387 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23390 /* We specifically want to allow elements of "structure" modes to
23391 be tieable to the structure. This more general condition allows
23392 other rarer situations too. */
23394 && (VALID_NEON_DREG_MODE (mode1
)
23395 || VALID_NEON_QREG_MODE (mode1
)
23396 || VALID_NEON_STRUCT_MODE (mode1
))
23397 && (VALID_NEON_DREG_MODE (mode2
)
23398 || VALID_NEON_QREG_MODE (mode2
)
23399 || VALID_NEON_STRUCT_MODE (mode2
)))
23405 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23406 not used in arm mode. */
23409 arm_regno_class (int regno
)
23411 if (regno
== PC_REGNUM
)
23416 if (regno
== STACK_POINTER_REGNUM
)
23418 if (regno
== CC_REGNUM
)
23425 if (TARGET_THUMB2
&& regno
< 8)
23428 if ( regno
<= LAST_ARM_REGNUM
23429 || regno
== FRAME_POINTER_REGNUM
23430 || regno
== ARG_POINTER_REGNUM
)
23431 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23433 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23434 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23436 if (IS_VFP_REGNUM (regno
))
23438 if (regno
<= D7_VFP_REGNUM
)
23439 return VFP_D0_D7_REGS
;
23440 else if (regno
<= LAST_LO_VFP_REGNUM
)
23441 return VFP_LO_REGS
;
23443 return VFP_HI_REGS
;
23446 if (IS_IWMMXT_REGNUM (regno
))
23447 return IWMMXT_REGS
;
23449 if (IS_IWMMXT_GR_REGNUM (regno
))
23450 return IWMMXT_GR_REGS
;
23455 /* Handle a special case when computing the offset
23456 of an argument from the frame pointer. */
23458 arm_debugger_arg_offset (int value
, rtx addr
)
23462 /* We are only interested if dbxout_parms() failed to compute the offset. */
23466 /* We can only cope with the case where the address is held in a register. */
23470 /* If we are using the frame pointer to point at the argument, then
23471 an offset of 0 is correct. */
23472 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23475 /* If we are using the stack pointer to point at the
23476 argument, then an offset of 0 is correct. */
23477 /* ??? Check this is consistent with thumb2 frame layout. */
23478 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23479 && REGNO (addr
) == SP_REGNUM
)
23482 /* Oh dear. The argument is pointed to by a register rather
23483 than being held in a register, or being stored at a known
23484 offset from the frame pointer. Since GDB only understands
23485 those two kinds of argument we must translate the address
23486 held in the register into an offset from the frame pointer.
23487 We do this by searching through the insns for the function
23488 looking to see where this register gets its value. If the
23489 register is initialized from the frame pointer plus an offset
23490 then we are in luck and we can continue, otherwise we give up.
23492 This code is exercised by producing debugging information
23493 for a function with arguments like this:
23495 double func (double a, double b, int c, double d) {return d;}
23497 Without this code the stab for parameter 'd' will be set to
23498 an offset of 0 from the frame pointer, rather than 8. */
23500 /* The if() statement says:
23502 If the insn is a normal instruction
23503 and if the insn is setting the value in a register
23504 and if the register being set is the register holding the address of the argument
23505 and if the address is computing by an addition
23506 that involves adding to a register
23507 which is the frame pointer
23512 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23514 if ( NONJUMP_INSN_P (insn
)
23515 && GET_CODE (PATTERN (insn
)) == SET
23516 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23517 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23518 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23519 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23520 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23523 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23532 warning (0, "unable to compute real location of stacked parameter");
23533 value
= 8; /* XXX magic hack */
23539 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23541 static const char *
23542 arm_invalid_parameter_type (const_tree t
)
23544 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23545 return N_("function parameters cannot have __fp16 type");
23549 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23551 static const char *
23552 arm_invalid_return_type (const_tree t
)
23554 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23555 return N_("functions cannot return __fp16 type");
23559 /* Implement TARGET_PROMOTED_TYPE. */
23562 arm_promoted_type (const_tree t
)
23564 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
23565 return float_type_node
;
23569 /* Implement TARGET_CONVERT_TO_TYPE.
23570 Specifically, this hook implements the peculiarity of the ARM
23571 half-precision floating-point C semantics that requires conversions between
23572 __fp16 to or from double to do an intermediate conversion to float. */
23575 arm_convert_to_type (tree type
, tree expr
)
23577 tree fromtype
= TREE_TYPE (expr
);
23578 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
23580 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
23581 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
23582 return convert (type
, convert (float_type_node
, expr
));
23586 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23587 This simply adds HFmode as a supported mode; even though we don't
23588 implement arithmetic on this type directly, it's supported by
23589 optabs conversions, much the way the double-word arithmetic is
23590 special-cased in the default hook. */
23593 arm_scalar_mode_supported_p (machine_mode mode
)
23595 if (mode
== HFmode
)
23596 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23597 else if (ALL_FIXED_POINT_MODE_P (mode
))
23600 return default_scalar_mode_supported_p (mode
);
23603 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23605 neon_reinterpret (rtx dest
, rtx src
)
23607 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
23610 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23611 not to early-clobber SRC registers in the process.
23613 We assume that the operands described by SRC and DEST represent a
23614 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23615 number of components into which the copy has been decomposed. */
23617 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23621 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23622 || REGNO (operands
[0]) < REGNO (operands
[1]))
23624 for (i
= 0; i
< count
; i
++)
23626 operands
[2 * i
] = dest
[i
];
23627 operands
[2 * i
+ 1] = src
[i
];
23632 for (i
= 0; i
< count
; i
++)
23634 operands
[2 * i
] = dest
[count
- i
- 1];
23635 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23640 /* Split operands into moves from op[1] + op[2] into op[0]. */
23643 neon_split_vcombine (rtx operands
[3])
23645 unsigned int dest
= REGNO (operands
[0]);
23646 unsigned int src1
= REGNO (operands
[1]);
23647 unsigned int src2
= REGNO (operands
[2]);
23648 machine_mode halfmode
= GET_MODE (operands
[1]);
23649 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23650 rtx destlo
, desthi
;
23652 if (src1
== dest
&& src2
== dest
+ halfregs
)
23654 /* No-op move. Can't split to nothing; emit something. */
23655 emit_note (NOTE_INSN_DELETED
);
23659 /* Preserve register attributes for variable tracking. */
23660 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23661 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23662 GET_MODE_SIZE (halfmode
));
23664 /* Special case of reversed high/low parts. Use VSWP. */
23665 if (src2
== dest
&& src1
== dest
+ halfregs
)
23667 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23668 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23669 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23673 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23675 /* Try to avoid unnecessary moves if part of the result
23676 is in the right place already. */
23678 emit_move_insn (destlo
, operands
[1]);
23679 if (src2
!= dest
+ halfregs
)
23680 emit_move_insn (desthi
, operands
[2]);
23684 if (src2
!= dest
+ halfregs
)
23685 emit_move_insn (desthi
, operands
[2]);
23687 emit_move_insn (destlo
, operands
[1]);
23691 /* Return the number (counting from 0) of
23692 the least significant set bit in MASK. */
23695 number_of_first_bit_set (unsigned mask
)
23697 return ctz_hwi (mask
);
23700 /* Like emit_multi_reg_push, but allowing for a different set of
23701 registers to be described as saved. MASK is the set of registers
23702 to be saved; REAL_REGS is the set of registers to be described as
23703 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23706 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23708 unsigned long regno
;
23709 rtx par
[10], tmp
, reg
;
23713 /* Build the parallel of the registers actually being stored. */
23714 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23716 regno
= ctz_hwi (mask
);
23717 reg
= gen_rtx_REG (SImode
, regno
);
23720 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23722 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23727 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23728 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23729 tmp
= gen_frame_mem (BLKmode
, tmp
);
23730 tmp
= gen_rtx_SET (tmp
, par
[0]);
23733 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23734 insn
= emit_insn (tmp
);
23736 /* Always build the stack adjustment note for unwind info. */
23737 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23738 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23741 /* Build the parallel of the registers recorded as saved for unwind. */
23742 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23744 regno
= ctz_hwi (real_regs
);
23745 reg
= gen_rtx_REG (SImode
, regno
);
23747 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23748 tmp
= gen_frame_mem (SImode
, tmp
);
23749 tmp
= gen_rtx_SET (tmp
, reg
);
23750 RTX_FRAME_RELATED_P (tmp
) = 1;
23758 RTX_FRAME_RELATED_P (par
[0]) = 1;
23759 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23762 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23767 /* Emit code to push or pop registers to or from the stack. F is the
23768 assembly file. MASK is the registers to pop. */
23770 thumb_pop (FILE *f
, unsigned long mask
)
23773 int lo_mask
= mask
& 0xFF;
23774 int pushed_words
= 0;
23778 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23780 /* Special case. Do not generate a POP PC statement here, do it in
23782 thumb_exit (f
, -1);
23786 fprintf (f
, "\tpop\t{");
23788 /* Look at the low registers first. */
23789 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23793 asm_fprintf (f
, "%r", regno
);
23795 if ((lo_mask
& ~1) != 0)
23802 if (mask
& (1 << PC_REGNUM
))
23804 /* Catch popping the PC. */
23805 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
23806 || crtl
->calls_eh_return
)
23808 /* The PC is never poped directly, instead
23809 it is popped into r3 and then BX is used. */
23810 fprintf (f
, "}\n");
23812 thumb_exit (f
, -1);
23821 asm_fprintf (f
, "%r", PC_REGNUM
);
23825 fprintf (f
, "}\n");
23828 /* Generate code to return from a thumb function.
23829 If 'reg_containing_return_addr' is -1, then the return address is
23830 actually on the stack, at the stack pointer. */
23832 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23834 unsigned regs_available_for_popping
;
23835 unsigned regs_to_pop
;
23837 unsigned available
;
23841 int restore_a4
= FALSE
;
23843 /* Compute the registers we need to pop. */
23847 if (reg_containing_return_addr
== -1)
23849 regs_to_pop
|= 1 << LR_REGNUM
;
23853 if (TARGET_BACKTRACE
)
23855 /* Restore the (ARM) frame pointer and stack pointer. */
23856 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23860 /* If there is nothing to pop then just emit the BX instruction and
23862 if (pops_needed
== 0)
23864 if (crtl
->calls_eh_return
)
23865 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23867 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23870 /* Otherwise if we are not supporting interworking and we have not created
23871 a backtrace structure and the function was not entered in ARM mode then
23872 just pop the return address straight into the PC. */
23873 else if (!TARGET_INTERWORK
23874 && !TARGET_BACKTRACE
23875 && !is_called_in_ARM_mode (current_function_decl
)
23876 && !crtl
->calls_eh_return
)
23878 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23882 /* Find out how many of the (return) argument registers we can corrupt. */
23883 regs_available_for_popping
= 0;
23885 /* If returning via __builtin_eh_return, the bottom three registers
23886 all contain information needed for the return. */
23887 if (crtl
->calls_eh_return
)
23891 /* If we can deduce the registers used from the function's
23892 return value. This is more reliable that examining
23893 df_regs_ever_live_p () because that will be set if the register is
23894 ever used in the function, not just if the register is used
23895 to hold a return value. */
23897 if (crtl
->return_rtx
!= 0)
23898 mode
= GET_MODE (crtl
->return_rtx
);
23900 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23902 size
= GET_MODE_SIZE (mode
);
23906 /* In a void function we can use any argument register.
23907 In a function that returns a structure on the stack
23908 we can use the second and third argument registers. */
23909 if (mode
== VOIDmode
)
23910 regs_available_for_popping
=
23911 (1 << ARG_REGISTER (1))
23912 | (1 << ARG_REGISTER (2))
23913 | (1 << ARG_REGISTER (3));
23915 regs_available_for_popping
=
23916 (1 << ARG_REGISTER (2))
23917 | (1 << ARG_REGISTER (3));
23919 else if (size
<= 4)
23920 regs_available_for_popping
=
23921 (1 << ARG_REGISTER (2))
23922 | (1 << ARG_REGISTER (3));
23923 else if (size
<= 8)
23924 regs_available_for_popping
=
23925 (1 << ARG_REGISTER (3));
23928 /* Match registers to be popped with registers into which we pop them. */
23929 for (available
= regs_available_for_popping
,
23930 required
= regs_to_pop
;
23931 required
!= 0 && available
!= 0;
23932 available
&= ~(available
& - available
),
23933 required
&= ~(required
& - required
))
23936 /* If we have any popping registers left over, remove them. */
23938 regs_available_for_popping
&= ~available
;
23940 /* Otherwise if we need another popping register we can use
23941 the fourth argument register. */
23942 else if (pops_needed
)
23944 /* If we have not found any free argument registers and
23945 reg a4 contains the return address, we must move it. */
23946 if (regs_available_for_popping
== 0
23947 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23949 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23950 reg_containing_return_addr
= LR_REGNUM
;
23952 else if (size
> 12)
23954 /* Register a4 is being used to hold part of the return value,
23955 but we have dire need of a free, low register. */
23958 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23961 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23963 /* The fourth argument register is available. */
23964 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23970 /* Pop as many registers as we can. */
23971 thumb_pop (f
, regs_available_for_popping
);
23973 /* Process the registers we popped. */
23974 if (reg_containing_return_addr
== -1)
23976 /* The return address was popped into the lowest numbered register. */
23977 regs_to_pop
&= ~(1 << LR_REGNUM
);
23979 reg_containing_return_addr
=
23980 number_of_first_bit_set (regs_available_for_popping
);
23982 /* Remove this register for the mask of available registers, so that
23983 the return address will not be corrupted by further pops. */
23984 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
23987 /* If we popped other registers then handle them here. */
23988 if (regs_available_for_popping
)
23992 /* Work out which register currently contains the frame pointer. */
23993 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
23995 /* Move it into the correct place. */
23996 asm_fprintf (f
, "\tmov\t%r, %r\n",
23997 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
23999 /* (Temporarily) remove it from the mask of popped registers. */
24000 regs_available_for_popping
&= ~(1 << frame_pointer
);
24001 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24003 if (regs_available_for_popping
)
24007 /* We popped the stack pointer as well,
24008 find the register that contains it. */
24009 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24011 /* Move it into the stack register. */
24012 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24014 /* At this point we have popped all necessary registers, so
24015 do not worry about restoring regs_available_for_popping
24016 to its correct value:
24018 assert (pops_needed == 0)
24019 assert (regs_available_for_popping == (1 << frame_pointer))
24020 assert (regs_to_pop == (1 << STACK_POINTER)) */
24024 /* Since we have just move the popped value into the frame
24025 pointer, the popping register is available for reuse, and
24026 we know that we still have the stack pointer left to pop. */
24027 regs_available_for_popping
|= (1 << frame_pointer
);
24031 /* If we still have registers left on the stack, but we no longer have
24032 any registers into which we can pop them, then we must move the return
24033 address into the link register and make available the register that
24035 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24037 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24039 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24040 reg_containing_return_addr
);
24042 reg_containing_return_addr
= LR_REGNUM
;
24045 /* If we have registers left on the stack then pop some more.
24046 We know that at most we will want to pop FP and SP. */
24047 if (pops_needed
> 0)
24052 thumb_pop (f
, regs_available_for_popping
);
24054 /* We have popped either FP or SP.
24055 Move whichever one it is into the correct register. */
24056 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24057 move_to
= number_of_first_bit_set (regs_to_pop
);
24059 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24061 regs_to_pop
&= ~(1 << move_to
);
24066 /* If we still have not popped everything then we must have only
24067 had one register available to us and we are now popping the SP. */
24068 if (pops_needed
> 0)
24072 thumb_pop (f
, regs_available_for_popping
);
24074 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24076 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24078 assert (regs_to_pop == (1 << STACK_POINTER))
24079 assert (pops_needed == 1)
24083 /* If necessary restore the a4 register. */
24086 if (reg_containing_return_addr
!= LR_REGNUM
)
24088 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24089 reg_containing_return_addr
= LR_REGNUM
;
24092 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24095 if (crtl
->calls_eh_return
)
24096 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24098 /* Return to caller. */
24099 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24102 /* Scan INSN just before assembler is output for it.
24103 For Thumb-1, we track the status of the condition codes; this
24104 information is used in the cbranchsi4_insn pattern. */
24106 thumb1_final_prescan_insn (rtx_insn
*insn
)
24108 if (flag_print_asm_name
)
24109 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24110 INSN_ADDRESSES (INSN_UID (insn
)));
24111 /* Don't overwrite the previous setter when we get to a cbranch. */
24112 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24114 enum attr_conds conds
;
24116 if (cfun
->machine
->thumb1_cc_insn
)
24118 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24119 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24122 conds
= get_attr_conds (insn
);
24123 if (conds
== CONDS_SET
)
24125 rtx set
= single_set (insn
);
24126 cfun
->machine
->thumb1_cc_insn
= insn
;
24127 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24128 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24129 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24130 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24132 rtx src1
= XEXP (SET_SRC (set
), 1);
24133 if (src1
== const0_rtx
)
24134 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24136 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24138 /* Record the src register operand instead of dest because
24139 cprop_hardreg pass propagates src. */
24140 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24143 else if (conds
!= CONDS_NOCOND
)
24144 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24147 /* Check if unexpected far jump is used. */
24148 if (cfun
->machine
->lr_save_eliminated
24149 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24150 internal_error("Unexpected thumb1 far jump");
24154 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24156 unsigned HOST_WIDE_INT mask
= 0xff;
24159 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24160 if (val
== 0) /* XXX */
24163 for (i
= 0; i
< 25; i
++)
24164 if ((val
& (mask
<< i
)) == val
)
24170 /* Returns nonzero if the current function contains,
24171 or might contain a far jump. */
24173 thumb_far_jump_used_p (void)
24176 bool far_jump
= false;
24177 unsigned int func_size
= 0;
24179 /* This test is only important for leaf functions. */
24180 /* assert (!leaf_function_p ()); */
24182 /* If we have already decided that far jumps may be used,
24183 do not bother checking again, and always return true even if
24184 it turns out that they are not being used. Once we have made
24185 the decision that far jumps are present (and that hence the link
24186 register will be pushed onto the stack) we cannot go back on it. */
24187 if (cfun
->machine
->far_jump_used
)
24190 /* If this function is not being called from the prologue/epilogue
24191 generation code then it must be being called from the
24192 INITIAL_ELIMINATION_OFFSET macro. */
24193 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24195 /* In this case we know that we are being asked about the elimination
24196 of the arg pointer register. If that register is not being used,
24197 then there are no arguments on the stack, and we do not have to
24198 worry that a far jump might force the prologue to push the link
24199 register, changing the stack offsets. In this case we can just
24200 return false, since the presence of far jumps in the function will
24201 not affect stack offsets.
24203 If the arg pointer is live (or if it was live, but has now been
24204 eliminated and so set to dead) then we do have to test to see if
24205 the function might contain a far jump. This test can lead to some
24206 false negatives, since before reload is completed, then length of
24207 branch instructions is not known, so gcc defaults to returning their
24208 longest length, which in turn sets the far jump attribute to true.
24210 A false negative will not result in bad code being generated, but it
24211 will result in a needless push and pop of the link register. We
24212 hope that this does not occur too often.
24214 If we need doubleword stack alignment this could affect the other
24215 elimination offsets so we can't risk getting it wrong. */
24216 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24217 cfun
->machine
->arg_pointer_live
= 1;
24218 else if (!cfun
->machine
->arg_pointer_live
)
24222 /* We should not change far_jump_used during or after reload, as there is
24223 no chance to change stack frame layout. */
24224 if (reload_in_progress
|| reload_completed
)
24227 /* Check to see if the function contains a branch
24228 insn with the far jump attribute set. */
24229 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24231 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24235 func_size
+= get_attr_length (insn
);
24238 /* Attribute far_jump will always be true for thumb1 before
24239 shorten_branch pass. So checking far_jump attribute before
24240 shorten_branch isn't much useful.
24242 Following heuristic tries to estimate more accurately if a far jump
24243 may finally be used. The heuristic is very conservative as there is
24244 no chance to roll-back the decision of not to use far jump.
24246 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24247 2-byte insn is associated with a 4 byte constant pool. Using
24248 function size 2048/3 as the threshold is conservative enough. */
24251 if ((func_size
* 3) >= 2048)
24253 /* Record the fact that we have decided that
24254 the function does use far jumps. */
24255 cfun
->machine
->far_jump_used
= 1;
24263 /* Return nonzero if FUNC must be entered in ARM mode. */
24265 is_called_in_ARM_mode (tree func
)
24267 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24269 /* Ignore the problem about functions whose address is taken. */
24270 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24274 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24280 /* Given the stack offsets and register mask in OFFSETS, decide how
24281 many additional registers to push instead of subtracting a constant
24282 from SP. For epilogues the principle is the same except we use pop.
24283 FOR_PROLOGUE indicates which we're generating. */
24285 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24287 HOST_WIDE_INT amount
;
24288 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24289 /* Extract a mask of the ones we can give to the Thumb's push/pop
24291 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24292 /* Then count how many other high registers will need to be pushed. */
24293 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24294 int n_free
, reg_base
, size
;
24296 if (!for_prologue
&& frame_pointer_needed
)
24297 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24299 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24301 /* If the stack frame size is 512 exactly, we can save one load
24302 instruction, which should make this a win even when optimizing
24304 if (!optimize_size
&& amount
!= 512)
24307 /* Can't do this if there are high registers to push. */
24308 if (high_regs_pushed
!= 0)
24311 /* Shouldn't do it in the prologue if no registers would normally
24312 be pushed at all. In the epilogue, also allow it if we'll have
24313 a pop insn for the PC. */
24316 || TARGET_BACKTRACE
24317 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24318 || TARGET_INTERWORK
24319 || crtl
->args
.pretend_args_size
!= 0))
24322 /* Don't do this if thumb_expand_prologue wants to emit instructions
24323 between the push and the stack frame allocation. */
24325 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24326 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24333 size
= arm_size_return_regs ();
24334 reg_base
= ARM_NUM_INTS (size
);
24335 live_regs_mask
>>= reg_base
;
24338 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24339 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24341 live_regs_mask
>>= 1;
24347 gcc_assert (amount
/ 4 * 4 == amount
);
24349 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24350 return (amount
- 508) / 4;
24351 if (amount
<= n_free
* 4)
24356 /* The bits which aren't usefully expanded as rtl. */
24358 thumb1_unexpanded_epilogue (void)
24360 arm_stack_offsets
*offsets
;
24362 unsigned long live_regs_mask
= 0;
24363 int high_regs_pushed
= 0;
24365 int had_to_push_lr
;
24368 if (cfun
->machine
->return_used_this_function
!= 0)
24371 if (IS_NAKED (arm_current_func_type ()))
24374 offsets
= arm_get_frame_offsets ();
24375 live_regs_mask
= offsets
->saved_regs_mask
;
24376 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24378 /* If we can deduce the registers used from the function's return value.
24379 This is more reliable that examining df_regs_ever_live_p () because that
24380 will be set if the register is ever used in the function, not just if
24381 the register is used to hold a return value. */
24382 size
= arm_size_return_regs ();
24384 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24387 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24388 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24391 /* The prolog may have pushed some high registers to use as
24392 work registers. e.g. the testsuite file:
24393 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24394 compiles to produce:
24395 push {r4, r5, r6, r7, lr}
24399 as part of the prolog. We have to undo that pushing here. */
24401 if (high_regs_pushed
)
24403 unsigned long mask
= live_regs_mask
& 0xff;
24406 /* The available low registers depend on the size of the value we are
24414 /* Oh dear! We have no low registers into which we can pop
24417 ("no low registers available for popping high registers");
24419 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24420 if (live_regs_mask
& (1 << next_hi_reg
))
24423 while (high_regs_pushed
)
24425 /* Find lo register(s) into which the high register(s) can
24427 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24429 if (mask
& (1 << regno
))
24430 high_regs_pushed
--;
24431 if (high_regs_pushed
== 0)
24435 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24437 /* Pop the values into the low register(s). */
24438 thumb_pop (asm_out_file
, mask
);
24440 /* Move the value(s) into the high registers. */
24441 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24443 if (mask
& (1 << regno
))
24445 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24448 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24449 if (live_regs_mask
& (1 << next_hi_reg
))
24454 live_regs_mask
&= ~0x0f00;
24457 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24458 live_regs_mask
&= 0xff;
24460 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24462 /* Pop the return address into the PC. */
24463 if (had_to_push_lr
)
24464 live_regs_mask
|= 1 << PC_REGNUM
;
24466 /* Either no argument registers were pushed or a backtrace
24467 structure was created which includes an adjusted stack
24468 pointer, so just pop everything. */
24469 if (live_regs_mask
)
24470 thumb_pop (asm_out_file
, live_regs_mask
);
24472 /* We have either just popped the return address into the
24473 PC or it is was kept in LR for the entire function.
24474 Note that thumb_pop has already called thumb_exit if the
24475 PC was in the list. */
24476 if (!had_to_push_lr
)
24477 thumb_exit (asm_out_file
, LR_REGNUM
);
24481 /* Pop everything but the return address. */
24482 if (live_regs_mask
)
24483 thumb_pop (asm_out_file
, live_regs_mask
);
24485 if (had_to_push_lr
)
24489 /* We have no free low regs, so save one. */
24490 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24494 /* Get the return address into a temporary register. */
24495 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24499 /* Move the return address to lr. */
24500 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24502 /* Restore the low register. */
24503 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24508 regno
= LAST_ARG_REGNUM
;
24513 /* Remove the argument registers that were pushed onto the stack. */
24514 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24515 SP_REGNUM
, SP_REGNUM
,
24516 crtl
->args
.pretend_args_size
);
24518 thumb_exit (asm_out_file
, regno
);
24524 /* Functions to save and restore machine-specific function data. */
24525 static struct machine_function
*
24526 arm_init_machine_status (void)
24528 struct machine_function
*machine
;
24529 machine
= ggc_cleared_alloc
<machine_function
> ();
24531 #if ARM_FT_UNKNOWN != 0
24532 machine
->func_type
= ARM_FT_UNKNOWN
;
24537 /* Return an RTX indicating where the return address to the
24538 calling function can be found. */
24540 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24545 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24548 /* Do anything needed before RTL is emitted for each function. */
24550 arm_init_expanders (void)
24552 /* Arrange to initialize and mark the machine per-function status. */
24553 init_machine_status
= arm_init_machine_status
;
24555 /* This is to stop the combine pass optimizing away the alignment
24556 adjustment of va_arg. */
24557 /* ??? It is claimed that this should not be necessary. */
24559 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24562 /* Check that FUNC is called with a different mode. */
24565 arm_change_mode_p (tree func
)
24567 if (TREE_CODE (func
) != FUNCTION_DECL
)
24570 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24573 callee_tree
= target_option_default_node
;
24575 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24576 int flags
= callee_opts
->x_target_flags
;
24578 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24581 /* Like arm_compute_initial_elimination offset. Simpler because there
24582 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24583 to point at the base of the local variables after static stack
24584 space for a function has been allocated. */
24587 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24589 arm_stack_offsets
*offsets
;
24591 offsets
= arm_get_frame_offsets ();
24595 case ARG_POINTER_REGNUM
:
24598 case STACK_POINTER_REGNUM
:
24599 return offsets
->outgoing_args
- offsets
->saved_args
;
24601 case FRAME_POINTER_REGNUM
:
24602 return offsets
->soft_frame
- offsets
->saved_args
;
24604 case ARM_HARD_FRAME_POINTER_REGNUM
:
24605 return offsets
->saved_regs
- offsets
->saved_args
;
24607 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24608 return offsets
->locals_base
- offsets
->saved_args
;
24611 gcc_unreachable ();
24615 case FRAME_POINTER_REGNUM
:
24618 case STACK_POINTER_REGNUM
:
24619 return offsets
->outgoing_args
- offsets
->soft_frame
;
24621 case ARM_HARD_FRAME_POINTER_REGNUM
:
24622 return offsets
->saved_regs
- offsets
->soft_frame
;
24624 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24625 return offsets
->locals_base
- offsets
->soft_frame
;
24628 gcc_unreachable ();
24633 gcc_unreachable ();
24637 /* Generate the function's prologue. */
24640 thumb1_expand_prologue (void)
24644 HOST_WIDE_INT amount
;
24645 HOST_WIDE_INT size
;
24646 arm_stack_offsets
*offsets
;
24647 unsigned long func_type
;
24649 unsigned long live_regs_mask
;
24650 unsigned long l_mask
;
24651 unsigned high_regs_pushed
= 0;
24653 func_type
= arm_current_func_type ();
24655 /* Naked functions don't have prologues. */
24656 if (IS_NAKED (func_type
))
24659 if (IS_INTERRUPT (func_type
))
24661 error ("interrupt Service Routines cannot be coded in Thumb mode");
24665 if (is_called_in_ARM_mode (current_function_decl
))
24666 emit_insn (gen_prologue_thumb1_interwork ());
24668 offsets
= arm_get_frame_offsets ();
24669 live_regs_mask
= offsets
->saved_regs_mask
;
24671 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24672 l_mask
= live_regs_mask
& 0x40ff;
24673 /* Then count how many other high registers will need to be pushed. */
24674 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24676 if (crtl
->args
.pretend_args_size
)
24678 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24680 if (cfun
->machine
->uses_anonymous_args
)
24682 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24683 unsigned long mask
;
24685 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24686 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24688 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24692 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24693 stack_pointer_rtx
, x
));
24695 RTX_FRAME_RELATED_P (insn
) = 1;
24698 if (TARGET_BACKTRACE
)
24700 HOST_WIDE_INT offset
= 0;
24701 unsigned work_register
;
24702 rtx work_reg
, x
, arm_hfp_rtx
;
24704 /* We have been asked to create a stack backtrace structure.
24705 The code looks like this:
24709 0 sub SP, #16 Reserve space for 4 registers.
24710 2 push {R7} Push low registers.
24711 4 add R7, SP, #20 Get the stack pointer before the push.
24712 6 str R7, [SP, #8] Store the stack pointer
24713 (before reserving the space).
24714 8 mov R7, PC Get hold of the start of this code + 12.
24715 10 str R7, [SP, #16] Store it.
24716 12 mov R7, FP Get hold of the current frame pointer.
24717 14 str R7, [SP, #4] Store it.
24718 16 mov R7, LR Get hold of the current return address.
24719 18 str R7, [SP, #12] Store it.
24720 20 add R7, SP, #16 Point at the start of the
24721 backtrace structure.
24722 22 mov FP, R7 Put this value into the frame pointer. */
24724 work_register
= thumb_find_work_register (live_regs_mask
);
24725 work_reg
= gen_rtx_REG (SImode
, work_register
);
24726 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24728 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24729 stack_pointer_rtx
, GEN_INT (-16)));
24730 RTX_FRAME_RELATED_P (insn
) = 1;
24734 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24735 RTX_FRAME_RELATED_P (insn
) = 1;
24737 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24740 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24741 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24743 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24744 x
= gen_frame_mem (SImode
, x
);
24745 emit_move_insn (x
, work_reg
);
24747 /* Make sure that the instruction fetching the PC is in the right place
24748 to calculate "start of backtrace creation code + 12". */
24749 /* ??? The stores using the common WORK_REG ought to be enough to
24750 prevent the scheduler from doing anything weird. Failing that
24751 we could always move all of the following into an UNSPEC_VOLATILE. */
24754 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24755 emit_move_insn (work_reg
, x
);
24757 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24758 x
= gen_frame_mem (SImode
, x
);
24759 emit_move_insn (x
, work_reg
);
24761 emit_move_insn (work_reg
, arm_hfp_rtx
);
24763 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24764 x
= gen_frame_mem (SImode
, x
);
24765 emit_move_insn (x
, work_reg
);
24769 emit_move_insn (work_reg
, arm_hfp_rtx
);
24771 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24772 x
= gen_frame_mem (SImode
, x
);
24773 emit_move_insn (x
, work_reg
);
24775 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24776 emit_move_insn (work_reg
, x
);
24778 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24779 x
= gen_frame_mem (SImode
, x
);
24780 emit_move_insn (x
, work_reg
);
24783 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24784 emit_move_insn (work_reg
, x
);
24786 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24787 x
= gen_frame_mem (SImode
, x
);
24788 emit_move_insn (x
, work_reg
);
24790 x
= GEN_INT (offset
+ 12);
24791 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24793 emit_move_insn (arm_hfp_rtx
, work_reg
);
24795 /* Optimization: If we are not pushing any low registers but we are going
24796 to push some high registers then delay our first push. This will just
24797 be a push of LR and we can combine it with the push of the first high
24799 else if ((l_mask
& 0xff) != 0
24800 || (high_regs_pushed
== 0 && l_mask
))
24802 unsigned long mask
= l_mask
;
24803 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24804 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24805 RTX_FRAME_RELATED_P (insn
) = 1;
24808 if (high_regs_pushed
)
24810 unsigned pushable_regs
;
24811 unsigned next_hi_reg
;
24812 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24813 : crtl
->args
.info
.nregs
;
24814 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24816 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24817 if (live_regs_mask
& (1 << next_hi_reg
))
24820 /* Here we need to mask out registers used for passing arguments
24821 even if they can be pushed. This is to avoid using them to stash the high
24822 registers. Such kind of stash may clobber the use of arguments. */
24823 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
24825 if (pushable_regs
== 0)
24826 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24828 while (high_regs_pushed
> 0)
24830 unsigned long real_regs_mask
= 0;
24832 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
24834 if (pushable_regs
& (1 << regno
))
24836 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24837 gen_rtx_REG (SImode
, next_hi_reg
));
24839 high_regs_pushed
--;
24840 real_regs_mask
|= (1 << next_hi_reg
);
24842 if (high_regs_pushed
)
24844 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24846 if (live_regs_mask
& (1 << next_hi_reg
))
24851 pushable_regs
&= ~((1 << regno
) - 1);
24857 /* If we had to find a work register and we have not yet
24858 saved the LR then add it to the list of regs to push. */
24859 if (l_mask
== (1 << LR_REGNUM
))
24861 pushable_regs
|= l_mask
;
24862 real_regs_mask
|= l_mask
;
24866 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
24867 RTX_FRAME_RELATED_P (insn
) = 1;
24871 /* Load the pic register before setting the frame pointer,
24872 so we can use r7 as a temporary work register. */
24873 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24874 arm_load_pic_register (live_regs_mask
);
24876 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24877 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24878 stack_pointer_rtx
);
24880 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24881 if (flag_stack_usage_info
)
24882 current_function_static_stack_size
= size
;
24884 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24885 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24886 sorry ("-fstack-check=specific for Thumb-1");
24888 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24889 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24894 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24895 GEN_INT (- amount
)));
24896 RTX_FRAME_RELATED_P (insn
) = 1;
24902 /* The stack decrement is too big for an immediate value in a single
24903 insn. In theory we could issue multiple subtracts, but after
24904 three of them it becomes more space efficient to place the full
24905 value in the constant pool and load into a register. (Also the
24906 ARM debugger really likes to see only one stack decrement per
24907 function). So instead we look for a scratch register into which
24908 we can load the decrement, and then we subtract this from the
24909 stack pointer. Unfortunately on the thumb the only available
24910 scratch registers are the argument registers, and we cannot use
24911 these as they may hold arguments to the function. Instead we
24912 attempt to locate a call preserved register which is used by this
24913 function. If we can find one, then we know that it will have
24914 been pushed at the start of the prologue and so we can corrupt
24916 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24917 if (live_regs_mask
& (1 << regno
))
24920 gcc_assert(regno
<= LAST_LO_REGNUM
);
24922 reg
= gen_rtx_REG (SImode
, regno
);
24924 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24926 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24927 stack_pointer_rtx
, reg
));
24929 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24930 plus_constant (Pmode
, stack_pointer_rtx
,
24932 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24933 RTX_FRAME_RELATED_P (insn
) = 1;
24937 if (frame_pointer_needed
)
24938 thumb_set_frame_pointer (offsets
);
24940 /* If we are profiling, make sure no instructions are scheduled before
24941 the call to mcount. Similarly if the user has requested no
24942 scheduling in the prolog. Similarly if we want non-call exceptions
24943 using the EABI unwinder, to prevent faulting instructions from being
24944 swapped with a stack adjustment. */
24945 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24946 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24947 && cfun
->can_throw_non_call_exceptions
))
24948 emit_insn (gen_blockage ());
24950 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24951 if (live_regs_mask
& 0xff)
24952 cfun
->machine
->lr_save_eliminated
= 0;
24955 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24956 POP instruction can be generated. LR should be replaced by PC. All
24957 the checks required are already done by USE_RETURN_INSN (). Hence,
24958 all we really need to check here is if single register is to be
24959 returned, or multiple register return. */
24961 thumb2_expand_return (bool simple_return
)
24964 unsigned long saved_regs_mask
;
24965 arm_stack_offsets
*offsets
;
24967 offsets
= arm_get_frame_offsets ();
24968 saved_regs_mask
= offsets
->saved_regs_mask
;
24970 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24971 if (saved_regs_mask
& (1 << i
))
24974 if (!simple_return
&& saved_regs_mask
)
24978 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24979 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
24980 rtx addr
= gen_rtx_MEM (SImode
,
24981 gen_rtx_POST_INC (SImode
,
24982 stack_pointer_rtx
));
24983 set_mem_alias_set (addr
, get_frame_alias_set ());
24984 XVECEXP (par
, 0, 0) = ret_rtx
;
24985 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
24986 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
24987 emit_jump_insn (par
);
24991 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
24992 saved_regs_mask
|= (1 << PC_REGNUM
);
24993 arm_emit_multi_reg_pop (saved_regs_mask
);
24998 emit_jump_insn (simple_return_rtx
);
25003 thumb1_expand_epilogue (void)
25005 HOST_WIDE_INT amount
;
25006 arm_stack_offsets
*offsets
;
25009 /* Naked functions don't have prologues. */
25010 if (IS_NAKED (arm_current_func_type ()))
25013 offsets
= arm_get_frame_offsets ();
25014 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25016 if (frame_pointer_needed
)
25018 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25019 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25021 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25023 gcc_assert (amount
>= 0);
25026 emit_insn (gen_blockage ());
25029 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25030 GEN_INT (amount
)));
25033 /* r3 is always free in the epilogue. */
25034 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25036 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25037 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25041 /* Emit a USE (stack_pointer_rtx), so that
25042 the stack adjustment will not be deleted. */
25043 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25045 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25046 emit_insn (gen_blockage ());
25048 /* Emit a clobber for each insn that will be restored in the epilogue,
25049 so that flow2 will get register lifetimes correct. */
25050 for (regno
= 0; regno
< 13; regno
++)
25051 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25052 emit_clobber (gen_rtx_REG (SImode
, regno
));
25054 if (! df_regs_ever_live_p (LR_REGNUM
))
25055 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25058 /* Epilogue code for APCS frame. */
25060 arm_expand_epilogue_apcs_frame (bool really_return
)
25062 unsigned long func_type
;
25063 unsigned long saved_regs_mask
;
25066 int floats_from_frame
= 0;
25067 arm_stack_offsets
*offsets
;
25069 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25070 func_type
= arm_current_func_type ();
25072 /* Get frame offsets for ARM. */
25073 offsets
= arm_get_frame_offsets ();
25074 saved_regs_mask
= offsets
->saved_regs_mask
;
25076 /* Find the offset of the floating-point save area in the frame. */
25078 = (offsets
->saved_args
25079 + arm_compute_static_chain_stack_bytes ()
25082 /* Compute how many core registers saved and how far away the floats are. */
25083 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25084 if (saved_regs_mask
& (1 << i
))
25087 floats_from_frame
+= 4;
25090 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25093 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25095 /* The offset is from IP_REGNUM. */
25096 int saved_size
= arm_get_vfp_saved_size ();
25097 if (saved_size
> 0)
25100 floats_from_frame
+= saved_size
;
25101 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25102 hard_frame_pointer_rtx
,
25103 GEN_INT (-floats_from_frame
)));
25104 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25105 ip_rtx
, hard_frame_pointer_rtx
);
25108 /* Generate VFP register multi-pop. */
25109 start_reg
= FIRST_VFP_REGNUM
;
25111 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25112 /* Look for a case where a reg does not need restoring. */
25113 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25114 && (!df_regs_ever_live_p (i
+ 1)
25115 || call_used_regs
[i
+ 1]))
25117 if (start_reg
!= i
)
25118 arm_emit_vfp_multi_reg_pop (start_reg
,
25119 (i
- start_reg
) / 2,
25120 gen_rtx_REG (SImode
,
25125 /* Restore the remaining regs that we have discovered (or possibly
25126 even all of them, if the conditional in the for loop never
25128 if (start_reg
!= i
)
25129 arm_emit_vfp_multi_reg_pop (start_reg
,
25130 (i
- start_reg
) / 2,
25131 gen_rtx_REG (SImode
, IP_REGNUM
));
25136 /* The frame pointer is guaranteed to be non-double-word aligned, as
25137 it is set to double-word-aligned old_stack_pointer - 4. */
25139 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25141 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25142 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25144 rtx addr
= gen_frame_mem (V2SImode
,
25145 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25147 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25148 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25149 gen_rtx_REG (V2SImode
, i
),
25155 /* saved_regs_mask should contain IP which contains old stack pointer
25156 at the time of activation creation. Since SP and IP are adjacent registers,
25157 we can restore the value directly into SP. */
25158 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25159 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25160 saved_regs_mask
|= (1 << SP_REGNUM
);
25162 /* There are two registers left in saved_regs_mask - LR and PC. We
25163 only need to restore LR (the return address), but to
25164 save time we can load it directly into PC, unless we need a
25165 special function exit sequence, or we are not really returning. */
25167 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25168 && !crtl
->calls_eh_return
)
25169 /* Delete LR from the register mask, so that LR on
25170 the stack is loaded into the PC in the register mask. */
25171 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25173 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25175 num_regs
= bit_count (saved_regs_mask
);
25176 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25179 emit_insn (gen_blockage ());
25180 /* Unwind the stack to just below the saved registers. */
25181 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25182 hard_frame_pointer_rtx
,
25183 GEN_INT (- 4 * num_regs
)));
25185 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25186 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25189 arm_emit_multi_reg_pop (saved_regs_mask
);
25191 if (IS_INTERRUPT (func_type
))
25193 /* Interrupt handlers will have pushed the
25194 IP onto the stack, so restore it now. */
25196 rtx addr
= gen_rtx_MEM (SImode
,
25197 gen_rtx_POST_INC (SImode
,
25198 stack_pointer_rtx
));
25199 set_mem_alias_set (addr
, get_frame_alias_set ());
25200 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25201 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25202 gen_rtx_REG (SImode
, IP_REGNUM
),
25206 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25209 if (crtl
->calls_eh_return
)
25210 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25212 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25214 if (IS_STACKALIGN (func_type
))
25215 /* Restore the original stack pointer. Before prologue, the stack was
25216 realigned and the original stack pointer saved in r0. For details,
25217 see comment in arm_expand_prologue. */
25218 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25220 emit_jump_insn (simple_return_rtx
);
25223 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25224 function is not a sibcall. */
25226 arm_expand_epilogue (bool really_return
)
25228 unsigned long func_type
;
25229 unsigned long saved_regs_mask
;
25233 arm_stack_offsets
*offsets
;
25235 func_type
= arm_current_func_type ();
25237 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25238 let output_return_instruction take care of instruction emission if any. */
25239 if (IS_NAKED (func_type
)
25240 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25243 emit_jump_insn (simple_return_rtx
);
25247 /* If we are throwing an exception, then we really must be doing a
25248 return, so we can't tail-call. */
25249 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25251 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25253 arm_expand_epilogue_apcs_frame (really_return
);
25257 /* Get frame offsets for ARM. */
25258 offsets
= arm_get_frame_offsets ();
25259 saved_regs_mask
= offsets
->saved_regs_mask
;
25260 num_regs
= bit_count (saved_regs_mask
);
25262 if (frame_pointer_needed
)
25265 /* Restore stack pointer if necessary. */
25268 /* In ARM mode, frame pointer points to first saved register.
25269 Restore stack pointer to last saved register. */
25270 amount
= offsets
->frame
- offsets
->saved_regs
;
25272 /* Force out any pending memory operations that reference stacked data
25273 before stack de-allocation occurs. */
25274 emit_insn (gen_blockage ());
25275 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25276 hard_frame_pointer_rtx
,
25277 GEN_INT (amount
)));
25278 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25280 hard_frame_pointer_rtx
);
25282 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25284 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25288 /* In Thumb-2 mode, the frame pointer points to the last saved
25290 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25293 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25294 hard_frame_pointer_rtx
,
25295 GEN_INT (amount
)));
25296 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25297 hard_frame_pointer_rtx
,
25298 hard_frame_pointer_rtx
);
25301 /* Force out any pending memory operations that reference stacked data
25302 before stack de-allocation occurs. */
25303 emit_insn (gen_blockage ());
25304 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25305 hard_frame_pointer_rtx
));
25306 arm_add_cfa_adjust_cfa_note (insn
, 0,
25308 hard_frame_pointer_rtx
);
25309 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25311 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25316 /* Pop off outgoing args and local frame to adjust stack pointer to
25317 last saved register. */
25318 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25322 /* Force out any pending memory operations that reference stacked data
25323 before stack de-allocation occurs. */
25324 emit_insn (gen_blockage ());
25325 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25327 GEN_INT (amount
)));
25328 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25329 stack_pointer_rtx
, stack_pointer_rtx
);
25330 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25332 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25336 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
25338 /* Generate VFP register multi-pop. */
25339 int end_reg
= LAST_VFP_REGNUM
+ 1;
25341 /* Scan the registers in reverse order. We need to match
25342 any groupings made in the prologue and generate matching
25343 vldm operations. The need to match groups is because,
25344 unlike pop, vldm can only do consecutive regs. */
25345 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25346 /* Look for a case where a reg does not need restoring. */
25347 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25348 && (!df_regs_ever_live_p (i
+ 1)
25349 || call_used_regs
[i
+ 1]))
25351 /* Restore the regs discovered so far (from reg+2 to
25353 if (end_reg
> i
+ 2)
25354 arm_emit_vfp_multi_reg_pop (i
+ 2,
25355 (end_reg
- (i
+ 2)) / 2,
25356 stack_pointer_rtx
);
25360 /* Restore the remaining regs that we have discovered (or possibly
25361 even all of them, if the conditional in the for loop never
25363 if (end_reg
> i
+ 2)
25364 arm_emit_vfp_multi_reg_pop (i
+ 2,
25365 (end_reg
- (i
+ 2)) / 2,
25366 stack_pointer_rtx
);
25370 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25371 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25374 rtx addr
= gen_rtx_MEM (V2SImode
,
25375 gen_rtx_POST_INC (SImode
,
25376 stack_pointer_rtx
));
25377 set_mem_alias_set (addr
, get_frame_alias_set ());
25378 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25379 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25380 gen_rtx_REG (V2SImode
, i
),
25382 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25383 stack_pointer_rtx
, stack_pointer_rtx
);
25386 if (saved_regs_mask
)
25389 bool return_in_pc
= false;
25391 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25392 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25393 && !IS_STACKALIGN (func_type
)
25395 && crtl
->args
.pretend_args_size
== 0
25396 && saved_regs_mask
& (1 << LR_REGNUM
)
25397 && !crtl
->calls_eh_return
)
25399 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25400 saved_regs_mask
|= (1 << PC_REGNUM
);
25401 return_in_pc
= true;
25404 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25406 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25407 if (saved_regs_mask
& (1 << i
))
25409 rtx addr
= gen_rtx_MEM (SImode
,
25410 gen_rtx_POST_INC (SImode
,
25411 stack_pointer_rtx
));
25412 set_mem_alias_set (addr
, get_frame_alias_set ());
25414 if (i
== PC_REGNUM
)
25416 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25417 XVECEXP (insn
, 0, 0) = ret_rtx
;
25418 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25420 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25421 insn
= emit_jump_insn (insn
);
25425 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25427 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25428 gen_rtx_REG (SImode
, i
),
25430 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25432 stack_pointer_rtx
);
25439 && current_tune
->prefer_ldrd_strd
25440 && !optimize_function_for_size_p (cfun
))
25443 thumb2_emit_ldrd_pop (saved_regs_mask
);
25444 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25445 arm_emit_ldrd_pop (saved_regs_mask
);
25447 arm_emit_multi_reg_pop (saved_regs_mask
);
25450 arm_emit_multi_reg_pop (saved_regs_mask
);
25458 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25462 rtx dwarf
= NULL_RTX
;
25464 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25466 GEN_INT (amount
)));
25468 RTX_FRAME_RELATED_P (tmp
) = 1;
25470 if (cfun
->machine
->uses_anonymous_args
)
25472 /* Restore pretend args. Refer arm_expand_prologue on how to save
25473 pretend_args in stack. */
25474 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25475 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25476 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25477 if (saved_regs_mask
& (1 << i
))
25479 rtx reg
= gen_rtx_REG (SImode
, i
);
25480 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25483 REG_NOTES (tmp
) = dwarf
;
25485 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25486 stack_pointer_rtx
, stack_pointer_rtx
);
25489 if (!really_return
)
25492 if (crtl
->calls_eh_return
)
25493 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25495 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25497 if (IS_STACKALIGN (func_type
))
25498 /* Restore the original stack pointer. Before prologue, the stack was
25499 realigned and the original stack pointer saved in r0. For details,
25500 see comment in arm_expand_prologue. */
25501 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25503 emit_jump_insn (simple_return_rtx
);
25506 /* Implementation of insn prologue_thumb1_interwork. This is the first
25507 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25510 thumb1_output_interwork (void)
25513 FILE *f
= asm_out_file
;
25515 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25516 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25518 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25520 /* Generate code sequence to switch us into Thumb mode. */
25521 /* The .code 32 directive has already been emitted by
25522 ASM_DECLARE_FUNCTION_NAME. */
25523 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25524 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25526 /* Generate a label, so that the debugger will notice the
25527 change in instruction sets. This label is also used by
25528 the assembler to bypass the ARM code when this function
25529 is called from a Thumb encoded function elsewhere in the
25530 same file. Hence the definition of STUB_NAME here must
25531 agree with the definition in gas/config/tc-arm.c. */
25533 #define STUB_NAME ".real_start_of"
25535 fprintf (f
, "\t.code\t16\n");
25537 if (arm_dllexport_name_p (name
))
25538 name
= arm_strip_name_encoding (name
);
25540 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25541 fprintf (f
, "\t.thumb_func\n");
25542 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25547 /* Handle the case of a double word load into a low register from
25548 a computed memory address. The computed address may involve a
25549 register which is overwritten by the load. */
25551 thumb_load_double_from_address (rtx
*operands
)
25559 gcc_assert (REG_P (operands
[0]));
25560 gcc_assert (MEM_P (operands
[1]));
25562 /* Get the memory address. */
25563 addr
= XEXP (operands
[1], 0);
25565 /* Work out how the memory address is computed. */
25566 switch (GET_CODE (addr
))
25569 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25571 if (REGNO (operands
[0]) == REGNO (addr
))
25573 output_asm_insn ("ldr\t%H0, %2", operands
);
25574 output_asm_insn ("ldr\t%0, %1", operands
);
25578 output_asm_insn ("ldr\t%0, %1", operands
);
25579 output_asm_insn ("ldr\t%H0, %2", operands
);
25584 /* Compute <address> + 4 for the high order load. */
25585 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25587 output_asm_insn ("ldr\t%0, %1", operands
);
25588 output_asm_insn ("ldr\t%H0, %2", operands
);
25592 arg1
= XEXP (addr
, 0);
25593 arg2
= XEXP (addr
, 1);
25595 if (CONSTANT_P (arg1
))
25596 base
= arg2
, offset
= arg1
;
25598 base
= arg1
, offset
= arg2
;
25600 gcc_assert (REG_P (base
));
25602 /* Catch the case of <address> = <reg> + <reg> */
25603 if (REG_P (offset
))
25605 int reg_offset
= REGNO (offset
);
25606 int reg_base
= REGNO (base
);
25607 int reg_dest
= REGNO (operands
[0]);
25609 /* Add the base and offset registers together into the
25610 higher destination register. */
25611 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25612 reg_dest
+ 1, reg_base
, reg_offset
);
25614 /* Load the lower destination register from the address in
25615 the higher destination register. */
25616 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25617 reg_dest
, reg_dest
+ 1);
25619 /* Load the higher destination register from its own address
25621 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25622 reg_dest
+ 1, reg_dest
+ 1);
25626 /* Compute <address> + 4 for the high order load. */
25627 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25629 /* If the computed address is held in the low order register
25630 then load the high order register first, otherwise always
25631 load the low order register first. */
25632 if (REGNO (operands
[0]) == REGNO (base
))
25634 output_asm_insn ("ldr\t%H0, %2", operands
);
25635 output_asm_insn ("ldr\t%0, %1", operands
);
25639 output_asm_insn ("ldr\t%0, %1", operands
);
25640 output_asm_insn ("ldr\t%H0, %2", operands
);
25646 /* With no registers to worry about we can just load the value
25648 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25650 output_asm_insn ("ldr\t%H0, %2", operands
);
25651 output_asm_insn ("ldr\t%0, %1", operands
);
25655 gcc_unreachable ();
25662 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25667 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25668 std::swap (operands
[4], operands
[5]);
25670 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25671 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25675 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25676 std::swap (operands
[4], operands
[5]);
25677 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25678 std::swap (operands
[5], operands
[6]);
25679 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25680 std::swap (operands
[4], operands
[5]);
25682 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25683 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25687 gcc_unreachable ();
25693 /* Output a call-via instruction for thumb state. */
25695 thumb_call_via_reg (rtx reg
)
25697 int regno
= REGNO (reg
);
25700 gcc_assert (regno
< LR_REGNUM
);
25702 /* If we are in the normal text section we can use a single instance
25703 per compilation unit. If we are doing function sections, then we need
25704 an entry per section, since we can't rely on reachability. */
25705 if (in_section
== text_section
)
25707 thumb_call_reg_needed
= 1;
25709 if (thumb_call_via_label
[regno
] == NULL
)
25710 thumb_call_via_label
[regno
] = gen_label_rtx ();
25711 labelp
= thumb_call_via_label
+ regno
;
25715 if (cfun
->machine
->call_via
[regno
] == NULL
)
25716 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25717 labelp
= cfun
->machine
->call_via
+ regno
;
25720 output_asm_insn ("bl\t%a0", labelp
);
25724 /* Routines for generating rtl. */
25726 thumb_expand_movmemqi (rtx
*operands
)
25728 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25729 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25730 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25731 HOST_WIDE_INT offset
= 0;
25735 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25741 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25747 rtx reg
= gen_reg_rtx (SImode
);
25748 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25749 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25756 rtx reg
= gen_reg_rtx (HImode
);
25757 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25758 plus_constant (Pmode
, in
,
25760 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25769 rtx reg
= gen_reg_rtx (QImode
);
25770 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25771 plus_constant (Pmode
, in
,
25773 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25780 thumb_reload_out_hi (rtx
*operands
)
25782 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25785 /* Handle reading a half-word from memory during reload. */
25787 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
25789 gcc_unreachable ();
25792 /* Return the length of a function name prefix
25793 that starts with the character 'c'. */
25795 arm_get_strip_length (int c
)
25799 ARM_NAME_ENCODING_LENGTHS
25804 /* Return a pointer to a function's name with any
25805 and all prefix encodings stripped from it. */
25807 arm_strip_name_encoding (const char *name
)
25811 while ((skip
= arm_get_strip_length (* name
)))
25817 /* If there is a '*' anywhere in the name's prefix, then
25818 emit the stripped name verbatim, otherwise prepend an
25819 underscore if leading underscores are being used. */
25821 arm_asm_output_labelref (FILE *stream
, const char *name
)
25826 while ((skip
= arm_get_strip_length (* name
)))
25828 verbatim
|= (*name
== '*');
25833 fputs (name
, stream
);
25835 asm_fprintf (stream
, "%U%s", name
);
25838 /* This function is used to emit an EABI tag and its associated value.
25839 We emit the numerical value of the tag in case the assembler does not
25840 support textual tags. (Eg gas prior to 2.20). If requested we include
25841 the tag name in a comment so that anyone reading the assembler output
25842 will know which tag is being set.
25844 This function is not static because arm-c.c needs it too. */
25847 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
25849 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
25850 if (flag_verbose_asm
|| flag_debug_asm
)
25851 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
25852 asm_fprintf (asm_out_file
, "\n");
25855 /* This function is used to print CPU tuning information as comment
25856 in assembler file. Pointers are not printed for now. */
25859 arm_print_tune_info (void)
25861 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
25862 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
25863 current_tune
->constant_limit
);
25864 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
25865 current_tune
->max_insns_skipped
);
25866 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
25867 current_tune
->prefetch
.num_slots
);
25868 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
25869 current_tune
->prefetch
.l1_cache_size
);
25870 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25871 current_tune
->prefetch
.l1_cache_line_size
);
25872 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
25873 (int) current_tune
->prefer_constant_pool
);
25874 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25875 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
25876 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
25877 current_tune
->branch_cost (false, false));
25878 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
25879 current_tune
->branch_cost (false, true));
25880 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
25881 current_tune
->branch_cost (true, false));
25882 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
25883 current_tune
->branch_cost (true, true));
25884 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
25885 (int) current_tune
->prefer_ldrd_strd
);
25886 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25887 (int) current_tune
->logical_op_non_short_circuit_thumb
,
25888 (int) current_tune
->logical_op_non_short_circuit_arm
);
25889 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
25890 (int) current_tune
->prefer_neon_for_64bits
);
25891 asm_fprintf (asm_out_file
,
25892 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25893 (int) current_tune
->disparage_flag_setting_t16_encodings
);
25894 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
25895 (int) current_tune
->string_ops_prefer_neon
);
25896 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
25897 current_tune
->max_insns_inline_memset
);
25898 asm_fprintf (asm_out_file
, "\t\t@fusible_ops:\t%u\n",
25899 current_tune
->fusible_ops
);
25900 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
25901 (int) current_tune
->sched_autopref
);
25905 arm_file_start (void)
25911 const char *fpu_name
;
25912 if (arm_selected_arch
)
25914 /* armv7ve doesn't support any extensions. */
25915 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
25917 /* Keep backward compatability for assemblers
25918 which don't support armv7ve. */
25919 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
25920 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
25921 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
25922 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
25923 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
25927 const char* pos
= strchr (arm_selected_arch
->name
, '+');
25931 gcc_assert (strlen (arm_selected_arch
->name
)
25932 <= sizeof (buf
) / sizeof (*pos
));
25933 strncpy (buf
, arm_selected_arch
->name
,
25934 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
25935 buf
[pos
- arm_selected_arch
->name
] = '\0';
25936 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
25937 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
25940 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
25943 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
25944 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
25947 const char* truncated_name
25948 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
25949 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
25952 if (print_tune_info
)
25953 arm_print_tune_info ();
25955 if (TARGET_SOFT_FLOAT
)
25957 fpu_name
= "softvfp";
25961 fpu_name
= arm_fpu_desc
->name
;
25962 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
25964 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
25965 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25967 if (TARGET_HARD_FLOAT_ABI
)
25968 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25971 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
25973 /* Some of these attributes only apply when the corresponding features
25974 are used. However we don't have any easy way of figuring this out.
25975 Conservatively record the setting that would have been used. */
25977 if (flag_rounding_math
)
25978 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25980 if (!flag_unsafe_math_optimizations
)
25982 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25983 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25985 if (flag_signaling_nans
)
25986 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25988 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25989 flag_finite_math_only
? 1 : 3);
25991 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25992 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25993 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25994 flag_short_enums
? 1 : 2);
25996 /* Tag_ABI_optimization_goals. */
25999 else if (optimize
>= 2)
26005 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26007 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26010 if (arm_fp16_format
)
26011 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26012 (int) arm_fp16_format
);
26014 if (arm_lang_output_object_attributes_hook
)
26015 arm_lang_output_object_attributes_hook();
26018 default_file_start ();
26022 arm_file_end (void)
26026 if (NEED_INDICATE_EXEC_STACK
)
26027 /* Add .note.GNU-stack. */
26028 file_end_indicate_exec_stack ();
26030 if (! thumb_call_reg_needed
)
26033 switch_to_section (text_section
);
26034 asm_fprintf (asm_out_file
, "\t.code 16\n");
26035 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26037 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26039 rtx label
= thumb_call_via_label
[regno
];
26043 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26044 CODE_LABEL_NUMBER (label
));
26045 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26051 /* Symbols in the text segment can be accessed without indirecting via the
26052 constant pool; it may take an extra binary operation, but this is still
26053 faster than indirecting via memory. Don't do this when not optimizing,
26054 since we won't be calculating al of the offsets necessary to do this
26058 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26060 if (optimize
> 0 && TREE_CONSTANT (decl
))
26061 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26063 default_encode_section_info (decl
, rtl
, first
);
26065 #endif /* !ARM_PE */
26068 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26070 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26071 && !strcmp (prefix
, "L"))
26073 arm_ccfsm_state
= 0;
26074 arm_target_insn
= NULL
;
26076 default_internal_label (stream
, prefix
, labelno
);
26079 /* Output code to add DELTA to the first argument, and then jump
26080 to FUNCTION. Used for C++ multiple inheritance. */
26082 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
26083 HOST_WIDE_INT delta
,
26084 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
26087 static int thunk_label
= 0;
26090 int mi_delta
= delta
;
26091 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26093 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26096 mi_delta
= - mi_delta
;
26098 final_start_function (emit_barrier (), file
, 1);
26102 int labelno
= thunk_label
++;
26103 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26104 /* Thunks are entered in arm mode when avaiable. */
26105 if (TARGET_THUMB1_ONLY
)
26107 /* push r3 so we can use it as a temporary. */
26108 /* TODO: Omit this save if r3 is not used. */
26109 fputs ("\tpush {r3}\n", file
);
26110 fputs ("\tldr\tr3, ", file
);
26114 fputs ("\tldr\tr12, ", file
);
26116 assemble_name (file
, label
);
26117 fputc ('\n', file
);
26120 /* If we are generating PIC, the ldr instruction below loads
26121 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26122 the address of the add + 8, so we have:
26124 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26127 Note that we have "+ 1" because some versions of GNU ld
26128 don't set the low bit of the result for R_ARM_REL32
26129 relocations against thumb function symbols.
26130 On ARMv6M this is +4, not +8. */
26131 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26132 assemble_name (file
, labelpc
);
26133 fputs (":\n", file
);
26134 if (TARGET_THUMB1_ONLY
)
26136 /* This is 2 insns after the start of the thunk, so we know it
26137 is 4-byte aligned. */
26138 fputs ("\tadd\tr3, pc, r3\n", file
);
26139 fputs ("\tmov r12, r3\n", file
);
26142 fputs ("\tadd\tr12, pc, r12\n", file
);
26144 else if (TARGET_THUMB1_ONLY
)
26145 fputs ("\tmov r12, r3\n", file
);
26147 if (TARGET_THUMB1_ONLY
)
26149 if (mi_delta
> 255)
26151 fputs ("\tldr\tr3, ", file
);
26152 assemble_name (file
, label
);
26153 fputs ("+4\n", file
);
26154 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26155 mi_op
, this_regno
, this_regno
);
26157 else if (mi_delta
!= 0)
26159 /* Thumb1 unified syntax requires s suffix in instruction name when
26160 one of the operands is immediate. */
26161 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26162 mi_op
, this_regno
, this_regno
,
26168 /* TODO: Use movw/movt for large constants when available. */
26169 while (mi_delta
!= 0)
26171 if ((mi_delta
& (3 << shift
)) == 0)
26175 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26176 mi_op
, this_regno
, this_regno
,
26177 mi_delta
& (0xff << shift
));
26178 mi_delta
&= ~(0xff << shift
);
26185 if (TARGET_THUMB1_ONLY
)
26186 fputs ("\tpop\t{r3}\n", file
);
26188 fprintf (file
, "\tbx\tr12\n");
26189 ASM_OUTPUT_ALIGN (file
, 2);
26190 assemble_name (file
, label
);
26191 fputs (":\n", file
);
26194 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26195 rtx tem
= XEXP (DECL_RTL (function
), 0);
26196 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26197 pipeline offset is four rather than eight. Adjust the offset
26199 tem
= plus_constant (GET_MODE (tem
), tem
,
26200 TARGET_THUMB1_ONLY
? -3 : -7);
26201 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26203 gen_rtx_SYMBOL_REF (Pmode
,
26204 ggc_strdup (labelpc
)));
26205 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26208 /* Output ".word .LTHUNKn". */
26209 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26211 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26212 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26216 fputs ("\tb\t", file
);
26217 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26218 if (NEED_PLT_RELOC
)
26219 fputs ("(PLT)", file
);
26220 fputc ('\n', file
);
26223 final_end_function ();
26227 arm_emit_vector_const (FILE *file
, rtx x
)
26230 const char * pattern
;
26232 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26234 switch (GET_MODE (x
))
26236 case V2SImode
: pattern
= "%08x"; break;
26237 case V4HImode
: pattern
= "%04x"; break;
26238 case V8QImode
: pattern
= "%02x"; break;
26239 default: gcc_unreachable ();
26242 fprintf (file
, "0x");
26243 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26247 element
= CONST_VECTOR_ELT (x
, i
);
26248 fprintf (file
, pattern
, INTVAL (element
));
26254 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26255 HFmode constant pool entries are actually loaded with ldr. */
26257 arm_emit_fp16_const (rtx c
)
26261 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26262 if (WORDS_BIG_ENDIAN
)
26263 assemble_zeros (2);
26264 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26265 if (!WORDS_BIG_ENDIAN
)
26266 assemble_zeros (2);
26270 arm_output_load_gr (rtx
*operands
)
26277 if (!MEM_P (operands
[1])
26278 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26279 || !REG_P (reg
= XEXP (sum
, 0))
26280 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26281 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26282 return "wldrw%?\t%0, %1";
26284 /* Fix up an out-of-range load of a GR register. */
26285 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26286 wcgr
= operands
[0];
26288 output_asm_insn ("ldr%?\t%0, %1", operands
);
26290 operands
[0] = wcgr
;
26292 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26293 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26298 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26300 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26301 named arg and all anonymous args onto the stack.
26302 XXX I know the prologue shouldn't be pushing registers, but it is faster
26306 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26310 int second_time ATTRIBUTE_UNUSED
)
26312 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26315 cfun
->machine
->uses_anonymous_args
= 1;
26316 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26318 nregs
= pcum
->aapcs_ncrn
;
26319 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
26323 nregs
= pcum
->nregs
;
26325 if (nregs
< NUM_ARG_REGS
)
26326 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26329 /* We can't rely on the caller doing the proper promotion when
26330 using APCS or ATPCS. */
26333 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26335 return !TARGET_AAPCS_BASED
;
26338 static machine_mode
26339 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26341 int *punsignedp ATTRIBUTE_UNUSED
,
26342 const_tree fntype ATTRIBUTE_UNUSED
,
26343 int for_return ATTRIBUTE_UNUSED
)
26345 if (GET_MODE_CLASS (mode
) == MODE_INT
26346 && GET_MODE_SIZE (mode
) < 4)
26352 /* AAPCS based ABIs use short enums by default. */
26355 arm_default_short_enums (void)
26357 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
26361 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26364 arm_align_anon_bitfield (void)
26366 return TARGET_AAPCS_BASED
;
26370 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26373 arm_cxx_guard_type (void)
26375 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26379 /* The EABI says test the least significant bit of a guard variable. */
26382 arm_cxx_guard_mask_bit (void)
26384 return TARGET_AAPCS_BASED
;
26388 /* The EABI specifies that all array cookies are 8 bytes long. */
26391 arm_get_cookie_size (tree type
)
26395 if (!TARGET_AAPCS_BASED
)
26396 return default_cxx_get_cookie_size (type
);
26398 size
= build_int_cst (sizetype
, 8);
26403 /* The EABI says that array cookies should also contain the element size. */
26406 arm_cookie_has_size (void)
26408 return TARGET_AAPCS_BASED
;
26412 /* The EABI says constructors and destructors should return a pointer to
26413 the object constructed/destroyed. */
26416 arm_cxx_cdtor_returns_this (void)
26418 return TARGET_AAPCS_BASED
;
26421 /* The EABI says that an inline function may never be the key
26425 arm_cxx_key_method_may_be_inline (void)
26427 return !TARGET_AAPCS_BASED
;
26431 arm_cxx_determine_class_data_visibility (tree decl
)
26433 if (!TARGET_AAPCS_BASED
26434 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26437 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26438 is exported. However, on systems without dynamic vague linkage,
26439 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26440 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26441 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26443 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26444 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26448 arm_cxx_class_data_always_comdat (void)
26450 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26451 vague linkage if the class has no key function. */
26452 return !TARGET_AAPCS_BASED
;
26456 /* The EABI says __aeabi_atexit should be used to register static
26460 arm_cxx_use_aeabi_atexit (void)
26462 return TARGET_AAPCS_BASED
;
26467 arm_set_return_address (rtx source
, rtx scratch
)
26469 arm_stack_offsets
*offsets
;
26470 HOST_WIDE_INT delta
;
26472 unsigned long saved_regs
;
26474 offsets
= arm_get_frame_offsets ();
26475 saved_regs
= offsets
->saved_regs_mask
;
26477 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26478 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26481 if (frame_pointer_needed
)
26482 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26485 /* LR will be the first saved register. */
26486 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26491 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26492 GEN_INT (delta
& ~4095)));
26497 addr
= stack_pointer_rtx
;
26499 addr
= plus_constant (Pmode
, addr
, delta
);
26501 /* The store needs to be marked as frame related in order to prevent
26502 DSE from deleting it as dead if it is based on fp. */
26503 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26504 RTX_FRAME_RELATED_P (insn
) = 1;
26505 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26511 thumb_set_return_address (rtx source
, rtx scratch
)
26513 arm_stack_offsets
*offsets
;
26514 HOST_WIDE_INT delta
;
26515 HOST_WIDE_INT limit
;
26518 unsigned long mask
;
26522 offsets
= arm_get_frame_offsets ();
26523 mask
= offsets
->saved_regs_mask
;
26524 if (mask
& (1 << LR_REGNUM
))
26527 /* Find the saved regs. */
26528 if (frame_pointer_needed
)
26530 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26531 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26537 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26540 /* Allow for the stack frame. */
26541 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26543 /* The link register is always the first saved register. */
26546 /* Construct the address. */
26547 addr
= gen_rtx_REG (SImode
, reg
);
26550 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26551 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26555 addr
= plus_constant (Pmode
, addr
, delta
);
26557 /* The store needs to be marked as frame related in order to prevent
26558 DSE from deleting it as dead if it is based on fp. */
26559 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26560 RTX_FRAME_RELATED_P (insn
) = 1;
26561 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26564 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26567 /* Implements target hook vector_mode_supported_p. */
26569 arm_vector_mode_supported_p (machine_mode mode
)
26571 /* Neon also supports V2SImode, etc. listed in the clause below. */
26572 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26573 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26574 || mode
== V2DImode
|| mode
== V8HFmode
))
26577 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26578 && ((mode
== V2SImode
)
26579 || (mode
== V4HImode
)
26580 || (mode
== V8QImode
)))
26583 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26584 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26585 || mode
== V2HAmode
))
26591 /* Implements target hook array_mode_supported_p. */
26594 arm_array_mode_supported_p (machine_mode mode
,
26595 unsigned HOST_WIDE_INT nelems
)
26598 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26599 && (nelems
>= 2 && nelems
<= 4))
26605 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26606 registers when autovectorizing for Neon, at least until multiple vector
26607 widths are supported properly by the middle-end. */
26609 static machine_mode
26610 arm_preferred_simd_mode (machine_mode mode
)
26616 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26618 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26620 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26622 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26624 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26631 if (TARGET_REALLY_IWMMXT
)
26647 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26649 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26650 using r0-r4 for function arguments, r7 for the stack frame and don't have
26651 enough left over to do doubleword arithmetic. For Thumb-2 all the
26652 potentially problematic instructions accept high registers so this is not
26653 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26654 that require many low registers. */
26656 arm_class_likely_spilled_p (reg_class_t rclass
)
26658 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26659 || rclass
== CC_REG
)
26665 /* Implements target hook small_register_classes_for_mode_p. */
26667 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26669 return TARGET_THUMB1
;
26672 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26673 ARM insns and therefore guarantee that the shift count is modulo 256.
26674 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26675 guarantee no particular behavior for out-of-range counts. */
26677 static unsigned HOST_WIDE_INT
26678 arm_shift_truncation_mask (machine_mode mode
)
26680 return mode
== SImode
? 255 : 0;
26684 /* Map internal gcc register numbers to DWARF2 register numbers. */
26687 arm_dbx_register_number (unsigned int regno
)
26692 if (IS_VFP_REGNUM (regno
))
26694 /* See comment in arm_dwarf_register_span. */
26695 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26696 return 64 + regno
- FIRST_VFP_REGNUM
;
26698 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26701 if (IS_IWMMXT_GR_REGNUM (regno
))
26702 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26704 if (IS_IWMMXT_REGNUM (regno
))
26705 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26707 return DWARF_FRAME_REGISTERS
;
26710 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26711 GCC models tham as 64 32-bit registers, so we need to describe this to
26712 the DWARF generation code. Other registers can use the default. */
26714 arm_dwarf_register_span (rtx rtl
)
26722 regno
= REGNO (rtl
);
26723 if (!IS_VFP_REGNUM (regno
))
26726 /* XXX FIXME: The EABI defines two VFP register ranges:
26727 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26729 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26730 corresponding D register. Until GDB supports this, we shall use the
26731 legacy encodings. We also use these encodings for D0-D15 for
26732 compatibility with older debuggers. */
26733 mode
= GET_MODE (rtl
);
26734 if (GET_MODE_SIZE (mode
) < 8)
26737 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26739 nregs
= GET_MODE_SIZE (mode
) / 4;
26740 for (i
= 0; i
< nregs
; i
+= 2)
26741 if (TARGET_BIG_END
)
26743 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26744 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
26748 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
26749 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
26754 nregs
= GET_MODE_SIZE (mode
) / 8;
26755 for (i
= 0; i
< nregs
; i
++)
26756 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
26759 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
26762 #if ARM_UNWIND_INFO
26763 /* Emit unwind directives for a store-multiple instruction or stack pointer
26764 push during alignment.
26765 These should only ever be generated by the function prologue code, so
26766 expect them to have a particular form.
26767 The store-multiple instruction sometimes pushes pc as the last register,
26768 although it should not be tracked into unwind information, or for -Os
26769 sometimes pushes some dummy registers before first register that needs
26770 to be tracked in unwind information; such dummy registers are there just
26771 to avoid separate stack adjustment, and will not be restored in the
26775 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
26778 HOST_WIDE_INT offset
;
26779 HOST_WIDE_INT nregs
;
26783 unsigned padfirst
= 0, padlast
= 0;
26786 e
= XVECEXP (p
, 0, 0);
26787 gcc_assert (GET_CODE (e
) == SET
);
26789 /* First insn will adjust the stack pointer. */
26790 gcc_assert (GET_CODE (e
) == SET
26791 && REG_P (SET_DEST (e
))
26792 && REGNO (SET_DEST (e
)) == SP_REGNUM
26793 && GET_CODE (SET_SRC (e
)) == PLUS
);
26795 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
26796 nregs
= XVECLEN (p
, 0) - 1;
26797 gcc_assert (nregs
);
26799 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
26802 /* For -Os dummy registers can be pushed at the beginning to
26803 avoid separate stack pointer adjustment. */
26804 e
= XVECEXP (p
, 0, 1);
26805 e
= XEXP (SET_DEST (e
), 0);
26806 if (GET_CODE (e
) == PLUS
)
26807 padfirst
= INTVAL (XEXP (e
, 1));
26808 gcc_assert (padfirst
== 0 || optimize_size
);
26809 /* The function prologue may also push pc, but not annotate it as it is
26810 never restored. We turn this into a stack pointer adjustment. */
26811 e
= XVECEXP (p
, 0, nregs
);
26812 e
= XEXP (SET_DEST (e
), 0);
26813 if (GET_CODE (e
) == PLUS
)
26814 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
26816 padlast
= offset
- 4;
26817 gcc_assert (padlast
== 0 || padlast
== 4);
26819 fprintf (asm_out_file
, "\t.pad #4\n");
26821 fprintf (asm_out_file
, "\t.save {");
26823 else if (IS_VFP_REGNUM (reg
))
26826 fprintf (asm_out_file
, "\t.vsave {");
26829 /* Unknown register type. */
26830 gcc_unreachable ();
26832 /* If the stack increment doesn't match the size of the saved registers,
26833 something has gone horribly wrong. */
26834 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
26838 /* The remaining insns will describe the stores. */
26839 for (i
= 1; i
<= nregs
; i
++)
26841 /* Expect (set (mem <addr>) (reg)).
26842 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26843 e
= XVECEXP (p
, 0, i
);
26844 gcc_assert (GET_CODE (e
) == SET
26845 && MEM_P (SET_DEST (e
))
26846 && REG_P (SET_SRC (e
)));
26848 reg
= REGNO (SET_SRC (e
));
26849 gcc_assert (reg
>= lastreg
);
26852 fprintf (asm_out_file
, ", ");
26853 /* We can't use %r for vfp because we need to use the
26854 double precision register names. */
26855 if (IS_VFP_REGNUM (reg
))
26856 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
26858 asm_fprintf (asm_out_file
, "%r", reg
);
26860 #ifdef ENABLE_CHECKING
26861 /* Check that the addresses are consecutive. */
26862 e
= XEXP (SET_DEST (e
), 0);
26863 if (GET_CODE (e
) == PLUS
)
26864 gcc_assert (REG_P (XEXP (e
, 0))
26865 && REGNO (XEXP (e
, 0)) == SP_REGNUM
26866 && CONST_INT_P (XEXP (e
, 1))
26867 && offset
== INTVAL (XEXP (e
, 1)));
26871 && REGNO (e
) == SP_REGNUM
);
26872 offset
+= reg_size
;
26875 fprintf (asm_out_file
, "}\n");
26877 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
26880 /* Emit unwind directives for a SET. */
26883 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
26891 switch (GET_CODE (e0
))
26894 /* Pushing a single register. */
26895 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
26896 || !REG_P (XEXP (XEXP (e0
, 0), 0))
26897 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
26900 asm_fprintf (asm_out_file
, "\t.save ");
26901 if (IS_VFP_REGNUM (REGNO (e1
)))
26902 asm_fprintf(asm_out_file
, "{d%d}\n",
26903 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
26905 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
26909 if (REGNO (e0
) == SP_REGNUM
)
26911 /* A stack increment. */
26912 if (GET_CODE (e1
) != PLUS
26913 || !REG_P (XEXP (e1
, 0))
26914 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
26915 || !CONST_INT_P (XEXP (e1
, 1)))
26918 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
26919 -INTVAL (XEXP (e1
, 1)));
26921 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
26923 HOST_WIDE_INT offset
;
26925 if (GET_CODE (e1
) == PLUS
)
26927 if (!REG_P (XEXP (e1
, 0))
26928 || !CONST_INT_P (XEXP (e1
, 1)))
26930 reg
= REGNO (XEXP (e1
, 0));
26931 offset
= INTVAL (XEXP (e1
, 1));
26932 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
26933 HARD_FRAME_POINTER_REGNUM
, reg
,
26936 else if (REG_P (e1
))
26939 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
26940 HARD_FRAME_POINTER_REGNUM
, reg
);
26945 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
26947 /* Move from sp to reg. */
26948 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
26950 else if (GET_CODE (e1
) == PLUS
26951 && REG_P (XEXP (e1
, 0))
26952 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
26953 && CONST_INT_P (XEXP (e1
, 1)))
26955 /* Set reg to offset from sp. */
26956 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
26957 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
26969 /* Emit unwind directives for the given insn. */
26972 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
26975 bool handled_one
= false;
26977 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26980 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26981 && (TREE_NOTHROW (current_function_decl
)
26982 || crtl
->all_throwers_are_sibcalls
))
26985 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
26988 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
26990 switch (REG_NOTE_KIND (note
))
26992 case REG_FRAME_RELATED_EXPR
:
26993 pat
= XEXP (note
, 0);
26996 case REG_CFA_REGISTER
:
26997 pat
= XEXP (note
, 0);
27000 pat
= PATTERN (insn
);
27001 if (GET_CODE (pat
) == PARALLEL
)
27002 pat
= XVECEXP (pat
, 0, 0);
27005 /* Only emitted for IS_STACKALIGN re-alignment. */
27010 src
= SET_SRC (pat
);
27011 dest
= SET_DEST (pat
);
27013 gcc_assert (src
== stack_pointer_rtx
);
27014 reg
= REGNO (dest
);
27015 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27018 handled_one
= true;
27021 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27022 to get correct dwarf information for shrink-wrap. We should not
27023 emit unwind information for it because these are used either for
27024 pretend arguments or notes to adjust sp and restore registers from
27026 case REG_CFA_DEF_CFA
:
27027 case REG_CFA_ADJUST_CFA
:
27028 case REG_CFA_RESTORE
:
27031 case REG_CFA_EXPRESSION
:
27032 case REG_CFA_OFFSET
:
27033 /* ??? Only handling here what we actually emit. */
27034 gcc_unreachable ();
27042 pat
= PATTERN (insn
);
27045 switch (GET_CODE (pat
))
27048 arm_unwind_emit_set (asm_out_file
, pat
);
27052 /* Store multiple. */
27053 arm_unwind_emit_sequence (asm_out_file
, pat
);
27062 /* Output a reference from a function exception table to the type_info
27063 object X. The EABI specifies that the symbol should be relocated by
27064 an R_ARM_TARGET2 relocation. */
27067 arm_output_ttype (rtx x
)
27069 fputs ("\t.word\t", asm_out_file
);
27070 output_addr_const (asm_out_file
, x
);
27071 /* Use special relocations for symbol references. */
27072 if (!CONST_INT_P (x
))
27073 fputs ("(TARGET2)", asm_out_file
);
27074 fputc ('\n', asm_out_file
);
27079 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27082 arm_asm_emit_except_personality (rtx personality
)
27084 fputs ("\t.personality\t", asm_out_file
);
27085 output_addr_const (asm_out_file
, personality
);
27086 fputc ('\n', asm_out_file
);
27089 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27092 arm_asm_init_sections (void)
27094 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27097 #endif /* ARM_UNWIND_INFO */
27099 /* Output unwind directives for the start/end of a function. */
27102 arm_output_fn_unwind (FILE * f
, bool prologue
)
27104 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27108 fputs ("\t.fnstart\n", f
);
27111 /* If this function will never be unwound, then mark it as such.
27112 The came condition is used in arm_unwind_emit to suppress
27113 the frame annotations. */
27114 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27115 && (TREE_NOTHROW (current_function_decl
)
27116 || crtl
->all_throwers_are_sibcalls
))
27117 fputs("\t.cantunwind\n", f
);
27119 fputs ("\t.fnend\n", f
);
27124 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27126 enum tls_reloc reloc
;
27129 val
= XVECEXP (x
, 0, 0);
27130 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27132 output_addr_const (fp
, val
);
27137 fputs ("(tlsgd)", fp
);
27140 fputs ("(tlsldm)", fp
);
27143 fputs ("(tlsldo)", fp
);
27146 fputs ("(gottpoff)", fp
);
27149 fputs ("(tpoff)", fp
);
27152 fputs ("(tlsdesc)", fp
);
27155 gcc_unreachable ();
27164 fputs (" + (. - ", fp
);
27165 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27166 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27167 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27168 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27178 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27181 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27183 gcc_assert (size
== 4);
27184 fputs ("\t.word\t", file
);
27185 output_addr_const (file
, x
);
27186 fputs ("(tlsldo)", file
);
27189 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27192 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27194 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27195 return arm_emit_tls_decoration (fp
, x
);
27196 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27199 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27201 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27202 assemble_name_raw (fp
, label
);
27206 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27208 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27212 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27216 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27218 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27222 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27226 else if (GET_CODE (x
) == CONST_VECTOR
)
27227 return arm_emit_vector_const (fp
, x
);
27232 /* Output assembly for a shift instruction.
27233 SET_FLAGS determines how the instruction modifies the condition codes.
27234 0 - Do not set condition codes.
27235 1 - Set condition codes.
27236 2 - Use smallest instruction. */
27238 arm_output_shift(rtx
* operands
, int set_flags
)
27241 static const char flag_chars
[3] = {'?', '.', '!'};
27246 c
= flag_chars
[set_flags
];
27247 if (TARGET_UNIFIED_ASM
)
27249 shift
= shift_op(operands
[3], &val
);
27253 operands
[2] = GEN_INT(val
);
27254 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27257 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27260 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
27261 output_asm_insn (pattern
, operands
);
27265 /* Output assembly for a WMMX immediate shift instruction. */
27267 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27269 int shift
= INTVAL (operands
[2]);
27271 machine_mode opmode
= GET_MODE (operands
[0]);
27273 gcc_assert (shift
>= 0);
27275 /* If the shift value in the register versions is > 63 (for D qualifier),
27276 31 (for W qualifier) or 15 (for H qualifier). */
27277 if (((opmode
== V4HImode
) && (shift
> 15))
27278 || ((opmode
== V2SImode
) && (shift
> 31))
27279 || ((opmode
== DImode
) && (shift
> 63)))
27283 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27284 output_asm_insn (templ
, operands
);
27285 if (opmode
== DImode
)
27287 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27288 output_asm_insn (templ
, operands
);
27293 /* The destination register will contain all zeros. */
27294 sprintf (templ
, "wzero\t%%0");
27295 output_asm_insn (templ
, operands
);
27300 if ((opmode
== DImode
) && (shift
> 32))
27302 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27303 output_asm_insn (templ
, operands
);
27304 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27305 output_asm_insn (templ
, operands
);
27309 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27310 output_asm_insn (templ
, operands
);
27315 /* Output assembly for a WMMX tinsr instruction. */
27317 arm_output_iwmmxt_tinsr (rtx
*operands
)
27319 int mask
= INTVAL (operands
[3]);
27322 int units
= mode_nunits
[GET_MODE (operands
[0])];
27323 gcc_assert ((mask
& (mask
- 1)) == 0);
27324 for (i
= 0; i
< units
; ++i
)
27326 if ((mask
& 0x01) == 1)
27332 gcc_assert (i
< units
);
27334 switch (GET_MODE (operands
[0]))
27337 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27340 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27343 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27346 gcc_unreachable ();
27349 output_asm_insn (templ
, operands
);
27354 /* Output a Thumb-1 casesi dispatch sequence. */
27356 thumb1_output_casesi (rtx
*operands
)
27358 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27360 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27362 switch (GET_MODE(diff_vec
))
27365 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27366 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27368 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27369 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27371 return "bl\t%___gnu_thumb1_case_si";
27373 gcc_unreachable ();
27377 /* Output a Thumb-2 casesi instruction. */
27379 thumb2_output_casesi (rtx
*operands
)
27381 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27383 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27385 output_asm_insn ("cmp\t%0, %1", operands
);
27386 output_asm_insn ("bhi\t%l3", operands
);
27387 switch (GET_MODE(diff_vec
))
27390 return "tbb\t[%|pc, %0]";
27392 return "tbh\t[%|pc, %0, lsl #1]";
27396 output_asm_insn ("adr\t%4, %l2", operands
);
27397 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27398 output_asm_insn ("add\t%4, %4, %5", operands
);
27403 output_asm_insn ("adr\t%4, %l2", operands
);
27404 return "ldr\t%|pc, [%4, %0, lsl #2]";
27407 gcc_unreachable ();
27411 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27412 per-core tuning structs. */
27414 arm_issue_rate (void)
27416 return current_tune
->issue_rate
;
27419 /* Return how many instructions should scheduler lookahead to choose the
27422 arm_first_cycle_multipass_dfa_lookahead (void)
27424 int issue_rate
= arm_issue_rate ();
27426 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27429 /* Enable modeling of L2 auto-prefetcher. */
27431 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27433 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27437 arm_mangle_type (const_tree type
)
27439 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27440 has to be managled as if it is in the "std" namespace. */
27441 if (TARGET_AAPCS_BASED
27442 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27443 return "St9__va_list";
27445 /* Half-precision float. */
27446 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27449 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27451 if (TYPE_NAME (type
) != NULL
)
27452 return arm_mangle_builtin_type (type
);
27454 /* Use the default mangling. */
27458 /* Order of allocation of core registers for Thumb: this allocation is
27459 written over the corresponding initial entries of the array
27460 initialized with REG_ALLOC_ORDER. We allocate all low registers
27461 first. Saving and restoring a low register is usually cheaper than
27462 using a call-clobbered high register. */
27464 static const int thumb_core_reg_alloc_order
[] =
27466 3, 2, 1, 0, 4, 5, 6, 7,
27467 14, 12, 8, 9, 10, 11
27470 /* Adjust register allocation order when compiling for Thumb. */
27473 arm_order_regs_for_local_alloc (void)
27475 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27476 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27478 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27479 sizeof (thumb_core_reg_alloc_order
));
27482 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27485 arm_frame_pointer_required (void)
27487 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27490 /* If the function receives nonlocal gotos, it needs to save the frame
27491 pointer in the nonlocal_goto_save_area object. */
27492 if (cfun
->has_nonlocal_label
)
27495 /* The frame pointer is required for non-leaf APCS frames. */
27496 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !leaf_function_p ())
27499 /* If we are probing the stack in the prologue, we will have a faulting
27500 instruction prior to the stack adjustment and this requires a frame
27501 pointer if we want to catch the exception using the EABI unwinder. */
27502 if (!IS_INTERRUPT (arm_current_func_type ())
27503 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27504 && arm_except_unwind_info (&global_options
) == UI_TARGET
27505 && cfun
->can_throw_non_call_exceptions
)
27507 HOST_WIDE_INT size
= get_frame_size ();
27509 /* That's irrelevant if there is no stack adjustment. */
27513 /* That's relevant only if there is a stack probe. */
27514 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27516 /* We don't have the final size of the frame so adjust. */
27517 size
+= 32 * UNITS_PER_WORD
;
27518 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27528 /* Only thumb1 can't support conditional execution, so return true if
27529 the target is not thumb1. */
27531 arm_have_conditional_execution (void)
27533 return !TARGET_THUMB1
;
27536 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27537 static HOST_WIDE_INT
27538 arm_vector_alignment (const_tree type
)
27540 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27542 if (TARGET_AAPCS_BASED
)
27543 align
= MIN (align
, 64);
27548 static unsigned int
27549 arm_autovectorize_vector_sizes (void)
27551 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27555 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27557 /* Vectors which aren't in packed structures will not be less aligned than
27558 the natural alignment of their element type, so this is safe. */
27559 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27562 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27566 arm_builtin_support_vector_misalignment (machine_mode mode
,
27567 const_tree type
, int misalignment
,
27570 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27572 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27577 /* If the misalignment is unknown, we should be able to handle the access
27578 so long as it is not to a member of a packed data structure. */
27579 if (misalignment
== -1)
27582 /* Return true if the misalignment is a multiple of the natural alignment
27583 of the vector's element type. This is probably always going to be
27584 true in practice, since we've already established that this isn't a
27586 return ((misalignment
% align
) == 0);
27589 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27594 arm_conditional_register_usage (void)
27598 if (TARGET_THUMB1
&& optimize_size
)
27600 /* When optimizing for size on Thumb-1, it's better not
27601 to use the HI regs, because of the overhead of
27603 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27604 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27607 /* The link register can be clobbered by any branch insn,
27608 but we have no way to track that at present, so mark
27609 it as unavailable. */
27611 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27613 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_VFP
)
27615 /* VFPv3 registers are disabled when earlier VFP
27616 versions are selected due to the definition of
27617 LAST_VFP_REGNUM. */
27618 for (regno
= FIRST_VFP_REGNUM
;
27619 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27621 fixed_regs
[regno
] = 0;
27622 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27623 || regno
>= FIRST_VFP_REGNUM
+ 32;
27627 if (TARGET_REALLY_IWMMXT
)
27629 regno
= FIRST_IWMMXT_GR_REGNUM
;
27630 /* The 2002/10/09 revision of the XScale ABI has wCG0
27631 and wCG1 as call-preserved registers. The 2002/11/21
27632 revision changed this so that all wCG registers are
27633 scratch registers. */
27634 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27635 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27636 fixed_regs
[regno
] = 0;
27637 /* The XScale ABI has wR0 - wR9 as scratch registers,
27638 the rest as call-preserved registers. */
27639 for (regno
= FIRST_IWMMXT_REGNUM
;
27640 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27642 fixed_regs
[regno
] = 0;
27643 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27647 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27649 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27650 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27652 else if (TARGET_APCS_STACK
)
27654 fixed_regs
[10] = 1;
27655 call_used_regs
[10] = 1;
27657 /* -mcaller-super-interworking reserves r11 for calls to
27658 _interwork_r11_call_via_rN(). Making the register global
27659 is an easy way of ensuring that it remains valid for all
27661 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27662 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27664 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27665 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27666 if (TARGET_CALLER_INTERWORKING
)
27667 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27669 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27673 arm_preferred_rename_class (reg_class_t rclass
)
27675 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27676 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27677 and code size can be reduced. */
27678 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27684 /* Compute the atrribute "length" of insn "*push_multi".
27685 So this function MUST be kept in sync with that insn pattern. */
27687 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27689 int i
, regno
, hi_reg
;
27690 int num_saves
= XVECLEN (parallel_op
, 0);
27700 regno
= REGNO (first_op
);
27701 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27702 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27704 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27705 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27713 /* Compute the number of instructions emitted by output_move_double. */
27715 arm_count_output_move_double_insns (rtx
*operands
)
27719 /* output_move_double may modify the operands array, so call it
27720 here on a copy of the array. */
27721 ops
[0] = operands
[0];
27722 ops
[1] = operands
[1];
27723 output_move_double (ops
, false, &count
);
27728 vfp3_const_double_for_fract_bits (rtx operand
)
27730 REAL_VALUE_TYPE r0
;
27732 if (!CONST_DOUBLE_P (operand
))
27735 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
27736 if (exact_real_inverse (DFmode
, &r0
)
27737 && !REAL_VALUE_NEGATIVE (r0
))
27739 if (exact_real_truncate (DFmode
, &r0
))
27741 HOST_WIDE_INT value
= real_to_integer (&r0
);
27742 value
= value
& 0xffffffff;
27743 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
27744 return int_log2 (value
);
27750 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27751 log2 is in [1, 32], return that log2. Otherwise return -1.
27752 This is used in the patterns for vcvt.s32.f32 floating-point to
27753 fixed-point conversions. */
27756 vfp3_const_double_for_bits (rtx x
)
27758 const REAL_VALUE_TYPE
*r
;
27760 if (!CONST_DOUBLE_P (x
))
27763 r
= CONST_DOUBLE_REAL_VALUE (x
);
27765 if (REAL_VALUE_NEGATIVE (*r
)
27766 || REAL_VALUE_ISNAN (*r
)
27767 || REAL_VALUE_ISINF (*r
)
27768 || !real_isinteger (r
, SFmode
))
27771 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
27773 /* The exact_log2 above will have returned -1 if this is
27774 not an exact log2. */
27775 if (!IN_RANGE (hwint
, 1, 32))
27782 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27785 arm_pre_atomic_barrier (enum memmodel model
)
27787 if (need_atomic_barrier_p (model
, true))
27788 emit_insn (gen_memory_barrier ());
27792 arm_post_atomic_barrier (enum memmodel model
)
27794 if (need_atomic_barrier_p (model
, false))
27795 emit_insn (gen_memory_barrier ());
27798 /* Emit the load-exclusive and store-exclusive instructions.
27799 Use acquire and release versions if necessary. */
27802 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
27804 rtx (*gen
) (rtx
, rtx
);
27810 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
27811 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
27812 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
27813 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
27815 gcc_unreachable ();
27822 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
27823 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
27824 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
27825 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
27827 gcc_unreachable ();
27831 emit_insn (gen (rval
, mem
));
27835 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
27838 rtx (*gen
) (rtx
, rtx
, rtx
);
27844 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
27845 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
27846 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
27847 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
27849 gcc_unreachable ();
27856 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
27857 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
27858 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
27859 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
27861 gcc_unreachable ();
27865 emit_insn (gen (bval
, rval
, mem
));
27868 /* Mark the previous jump instruction as unlikely. */
27871 emit_unlikely_jump (rtx insn
)
27873 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
27875 insn
= emit_jump_insn (insn
);
27876 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
27879 /* Expand a compare and swap pattern. */
27882 arm_expand_compare_and_swap (rtx operands
[])
27884 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
27886 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
27888 bval
= operands
[0];
27889 rval
= operands
[1];
27891 oldval
= operands
[3];
27892 newval
= operands
[4];
27893 is_weak
= operands
[5];
27894 mod_s
= operands
[6];
27895 mod_f
= operands
[7];
27896 mode
= GET_MODE (mem
);
27898 /* Normally the succ memory model must be stronger than fail, but in the
27899 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27900 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27902 if (TARGET_HAVE_LDACQ
27903 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
27904 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
27905 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
27911 /* For narrow modes, we're going to perform the comparison in SImode,
27912 so do the zero-extension now. */
27913 rval
= gen_reg_rtx (SImode
);
27914 oldval
= convert_modes (SImode
, mode
, oldval
, true);
27918 /* Force the value into a register if needed. We waited until after
27919 the zero-extension above to do this properly. */
27920 if (!arm_add_operand (oldval
, SImode
))
27921 oldval
= force_reg (SImode
, oldval
);
27925 if (!cmpdi_operand (oldval
, mode
))
27926 oldval
= force_reg (mode
, oldval
);
27930 gcc_unreachable ();
27935 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
27936 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27937 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27938 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27940 gcc_unreachable ();
27943 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27945 if (mode
== QImode
|| mode
== HImode
)
27946 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27948 /* In all cases, we arrange for success to be signaled by Z set.
27949 This arrangement allows for the boolean result to be used directly
27950 in a subsequent branch, post optimization. */
27951 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27952 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
27953 emit_insn (gen_rtx_SET (bval
, x
));
27956 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27957 another memory store between the load-exclusive and store-exclusive can
27958 reset the monitor from Exclusive to Open state. This means we must wait
27959 until after reload to split the pattern, lest we get a register spill in
27960 the middle of the atomic sequence. */
27963 arm_split_compare_and_swap (rtx operands
[])
27965 rtx rval
, mem
, oldval
, newval
, scratch
;
27967 enum memmodel mod_s
, mod_f
;
27969 rtx_code_label
*label1
, *label2
;
27972 rval
= operands
[0];
27974 oldval
= operands
[2];
27975 newval
= operands
[3];
27976 is_weak
= (operands
[4] != const0_rtx
);
27977 mod_s
= memmodel_from_int (INTVAL (operands
[5]));
27978 mod_f
= memmodel_from_int (INTVAL (operands
[6]));
27979 scratch
= operands
[7];
27980 mode
= GET_MODE (mem
);
27982 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
27984 bool use_acquire
= TARGET_HAVE_LDACQ
27985 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27986 || is_mm_release (mod_s
));
27988 bool use_release
= TARGET_HAVE_LDACQ
27989 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27990 || is_mm_acquire (mod_s
));
27992 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27993 a full barrier is emitted after the store-release. */
27995 use_acquire
= false;
27997 /* Checks whether a barrier is needed and emits one accordingly. */
27998 if (!(use_acquire
|| use_release
))
27999 arm_pre_atomic_barrier (mod_s
);
28004 label1
= gen_label_rtx ();
28005 emit_label (label1
);
28007 label2
= gen_label_rtx ();
28009 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28011 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, scratch
);
28012 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28013 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28014 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28015 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28017 arm_emit_store_exclusive (mode
, scratch
, mem
, newval
, use_release
);
28019 /* Weak or strong, we want EQ to be true for success, so that we
28020 match the flags that we got from the compare above. */
28021 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28022 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
28023 emit_insn (gen_rtx_SET (cond
, x
));
28027 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28028 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28029 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
28030 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28033 if (!is_mm_relaxed (mod_f
))
28034 emit_label (label2
);
28036 /* Checks whether a barrier is needed and emits one accordingly. */
28038 || !(use_acquire
|| use_release
))
28039 arm_post_atomic_barrier (mod_s
);
28041 if (is_mm_relaxed (mod_f
))
28042 emit_label (label2
);
28046 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28047 rtx value
, rtx model_rtx
, rtx cond
)
28049 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28050 machine_mode mode
= GET_MODE (mem
);
28051 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28052 rtx_code_label
*label
;
28055 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28057 bool use_acquire
= TARGET_HAVE_LDACQ
28058 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28059 || is_mm_release (model
));
28061 bool use_release
= TARGET_HAVE_LDACQ
28062 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28063 || is_mm_acquire (model
));
28065 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28066 a full barrier is emitted after the store-release. */
28068 use_acquire
= false;
28070 /* Checks whether a barrier is needed and emits one accordingly. */
28071 if (!(use_acquire
|| use_release
))
28072 arm_pre_atomic_barrier (model
);
28074 label
= gen_label_rtx ();
28075 emit_label (label
);
28078 new_out
= gen_lowpart (wmode
, new_out
);
28080 old_out
= gen_lowpart (wmode
, old_out
);
28083 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28085 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28094 x
= gen_rtx_AND (wmode
, old_out
, value
);
28095 emit_insn (gen_rtx_SET (new_out
, x
));
28096 x
= gen_rtx_NOT (wmode
, new_out
);
28097 emit_insn (gen_rtx_SET (new_out
, x
));
28101 if (CONST_INT_P (value
))
28103 value
= GEN_INT (-INTVAL (value
));
28109 if (mode
== DImode
)
28111 /* DImode plus/minus need to clobber flags. */
28112 /* The adddi3 and subdi3 patterns are incorrectly written so that
28113 they require matching operands, even when we could easily support
28114 three operands. Thankfully, this can be fixed up post-splitting,
28115 as the individual add+adc patterns do accept three operands and
28116 post-reload cprop can make these moves go away. */
28117 emit_move_insn (new_out
, old_out
);
28119 x
= gen_adddi3 (new_out
, new_out
, value
);
28121 x
= gen_subdi3 (new_out
, new_out
, value
);
28128 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28129 emit_insn (gen_rtx_SET (new_out
, x
));
28133 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28136 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28137 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28139 /* Checks whether a barrier is needed and emits one accordingly. */
28141 || !(use_acquire
|| use_release
))
28142 arm_post_atomic_barrier (model
);
28145 #define MAX_VECT_LEN 16
28147 struct expand_vec_perm_d
28149 rtx target
, op0
, op1
;
28150 unsigned char perm
[MAX_VECT_LEN
];
28151 machine_mode vmode
;
28152 unsigned char nelt
;
28157 /* Generate a variable permutation. */
28160 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28162 machine_mode vmode
= GET_MODE (target
);
28163 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28165 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28166 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28167 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28168 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28169 gcc_checking_assert (TARGET_NEON
);
28173 if (vmode
== V8QImode
)
28174 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28176 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28182 if (vmode
== V8QImode
)
28184 pair
= gen_reg_rtx (V16QImode
);
28185 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28186 pair
= gen_lowpart (TImode
, pair
);
28187 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28191 pair
= gen_reg_rtx (OImode
);
28192 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28193 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28199 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28201 machine_mode vmode
= GET_MODE (target
);
28202 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28203 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28204 rtx rmask
[MAX_VECT_LEN
], mask
;
28206 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28207 numbering of elements for big-endian, we must reverse the order. */
28208 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28210 /* The VTBL instruction does not use a modulo index, so we must take care
28211 of that ourselves. */
28212 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28213 for (i
= 0; i
< nelt
; ++i
)
28215 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28216 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28218 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28221 /* Generate or test for an insn that supports a constant permutation. */
28223 /* Recognize patterns for the VUZP insns. */
28226 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28228 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28229 rtx out0
, out1
, in0
, in1
;
28230 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28232 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28235 /* Note that these are little-endian tests. Adjust for big-endian later. */
28236 if (d
->perm
[0] == 0)
28238 else if (d
->perm
[0] == 1)
28242 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28244 for (i
= 0; i
< nelt
; i
++)
28246 unsigned elt
= (i
* 2 + odd
) & mask
;
28247 if (d
->perm
[i
] != elt
)
28257 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28258 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28259 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28260 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28261 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28262 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28263 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28264 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28266 gcc_unreachable ();
28271 if (BYTES_BIG_ENDIAN
)
28273 std::swap (in0
, in1
);
28278 out1
= gen_reg_rtx (d
->vmode
);
28280 std::swap (out0
, out1
);
28282 emit_insn (gen (out0
, in0
, in1
, out1
));
28286 /* Recognize patterns for the VZIP insns. */
28289 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28291 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28292 rtx out0
, out1
, in0
, in1
;
28293 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28295 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28298 /* Note that these are little-endian tests. Adjust for big-endian later. */
28300 if (d
->perm
[0] == high
)
28302 else if (d
->perm
[0] == 0)
28306 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28308 for (i
= 0; i
< nelt
/ 2; i
++)
28310 unsigned elt
= (i
+ high
) & mask
;
28311 if (d
->perm
[i
* 2] != elt
)
28313 elt
= (elt
+ nelt
) & mask
;
28314 if (d
->perm
[i
* 2 + 1] != elt
)
28324 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28325 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28326 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28327 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28328 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28329 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28330 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28331 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28333 gcc_unreachable ();
28338 if (BYTES_BIG_ENDIAN
)
28340 std::swap (in0
, in1
);
28345 out1
= gen_reg_rtx (d
->vmode
);
28347 std::swap (out0
, out1
);
28349 emit_insn (gen (out0
, in0
, in1
, out1
));
28353 /* Recognize patterns for the VREV insns. */
28356 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28358 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28359 rtx (*gen
)(rtx
, rtx
);
28361 if (!d
->one_vector_p
)
28370 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28371 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28379 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28380 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28381 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28382 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28390 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28391 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28392 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28393 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28394 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28395 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28396 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28397 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28406 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28407 for (j
= 0; j
<= diff
; j
+= 1)
28409 /* This is guaranteed to be true as the value of diff
28410 is 7, 3, 1 and we should have enough elements in the
28411 queue to generate this. Getting a vector mask with a
28412 value of diff other than these values implies that
28413 something is wrong by the time we get here. */
28414 gcc_assert (i
+ j
< nelt
);
28415 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28423 emit_insn (gen (d
->target
, d
->op0
));
28427 /* Recognize patterns for the VTRN insns. */
28430 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28432 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28433 rtx out0
, out1
, in0
, in1
;
28434 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28436 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28439 /* Note that these are little-endian tests. Adjust for big-endian later. */
28440 if (d
->perm
[0] == 0)
28442 else if (d
->perm
[0] == 1)
28446 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28448 for (i
= 0; i
< nelt
; i
+= 2)
28450 if (d
->perm
[i
] != i
+ odd
)
28452 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28462 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28463 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28464 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28465 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28466 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28467 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28468 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28469 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28471 gcc_unreachable ();
28476 if (BYTES_BIG_ENDIAN
)
28478 std::swap (in0
, in1
);
28483 out1
= gen_reg_rtx (d
->vmode
);
28485 std::swap (out0
, out1
);
28487 emit_insn (gen (out0
, in0
, in1
, out1
));
28491 /* Recognize patterns for the VEXT insns. */
28494 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28496 unsigned int i
, nelt
= d
->nelt
;
28497 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28500 unsigned int location
;
28502 unsigned int next
= d
->perm
[0] + 1;
28504 /* TODO: Handle GCC's numbering of elements for big-endian. */
28505 if (BYTES_BIG_ENDIAN
)
28508 /* Check if the extracted indexes are increasing by one. */
28509 for (i
= 1; i
< nelt
; next
++, i
++)
28511 /* If we hit the most significant element of the 2nd vector in
28512 the previous iteration, no need to test further. */
28513 if (next
== 2 * nelt
)
28516 /* If we are operating on only one vector: it could be a
28517 rotation. If there are only two elements of size < 64, let
28518 arm_evpc_neon_vrev catch it. */
28519 if (d
->one_vector_p
&& (next
== nelt
))
28521 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28527 if (d
->perm
[i
] != next
)
28531 location
= d
->perm
[0];
28535 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
28536 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
28537 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
28538 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
28539 case V2SImode
: gen
= gen_neon_vextv2si
; break;
28540 case V4SImode
: gen
= gen_neon_vextv4si
; break;
28541 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
28542 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
28543 case V2DImode
: gen
= gen_neon_vextv2di
; break;
28552 offset
= GEN_INT (location
);
28553 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
28557 /* The NEON VTBL instruction is a fully variable permuation that's even
28558 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28559 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28560 can do slightly better by expanding this as a constant where we don't
28561 have to apply a mask. */
28564 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
28566 rtx rperm
[MAX_VECT_LEN
], sel
;
28567 machine_mode vmode
= d
->vmode
;
28568 unsigned int i
, nelt
= d
->nelt
;
28570 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28571 numbering of elements for big-endian, we must reverse the order. */
28572 if (BYTES_BIG_ENDIAN
)
28578 /* Generic code will try constant permutation twice. Once with the
28579 original mode and again with the elements lowered to QImode.
28580 So wait and don't do the selector expansion ourselves. */
28581 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
28584 for (i
= 0; i
< nelt
; ++i
)
28585 rperm
[i
] = GEN_INT (d
->perm
[i
]);
28586 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
28587 sel
= force_reg (vmode
, sel
);
28589 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
28594 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
28596 /* Check if the input mask matches vext before reordering the
28599 if (arm_evpc_neon_vext (d
))
28602 /* The pattern matching functions above are written to look for a small
28603 number to begin the sequence (0, 1, N/2). If we begin with an index
28604 from the second operand, we can swap the operands. */
28605 if (d
->perm
[0] >= d
->nelt
)
28607 unsigned i
, nelt
= d
->nelt
;
28609 for (i
= 0; i
< nelt
; ++i
)
28610 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
28612 std::swap (d
->op0
, d
->op1
);
28617 if (arm_evpc_neon_vuzp (d
))
28619 if (arm_evpc_neon_vzip (d
))
28621 if (arm_evpc_neon_vrev (d
))
28623 if (arm_evpc_neon_vtrn (d
))
28625 return arm_evpc_neon_vtbl (d
);
28630 /* Expand a vec_perm_const pattern. */
28633 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28635 struct expand_vec_perm_d d
;
28636 int i
, nelt
, which
;
28642 d
.vmode
= GET_MODE (target
);
28643 gcc_assert (VECTOR_MODE_P (d
.vmode
));
28644 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28645 d
.testing_p
= false;
28647 for (i
= which
= 0; i
< nelt
; ++i
)
28649 rtx e
= XVECEXP (sel
, 0, i
);
28650 int ei
= INTVAL (e
) & (2 * nelt
- 1);
28651 which
|= (ei
< nelt
? 1 : 2);
28661 d
.one_vector_p
= false;
28662 if (!rtx_equal_p (op0
, op1
))
28665 /* The elements of PERM do not suggest that only the first operand
28666 is used, but both operands are identical. Allow easier matching
28667 of the permutation by folding the permutation into the single
28671 for (i
= 0; i
< nelt
; ++i
)
28672 d
.perm
[i
] &= nelt
- 1;
28674 d
.one_vector_p
= true;
28679 d
.one_vector_p
= true;
28683 return arm_expand_vec_perm_const_1 (&d
);
28686 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28689 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
28690 const unsigned char *sel
)
28692 struct expand_vec_perm_d d
;
28693 unsigned int i
, nelt
, which
;
28697 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
28698 d
.testing_p
= true;
28699 memcpy (d
.perm
, sel
, nelt
);
28701 /* Categorize the set of elements in the selector. */
28702 for (i
= which
= 0; i
< nelt
; ++i
)
28704 unsigned char e
= d
.perm
[i
];
28705 gcc_assert (e
< 2 * nelt
);
28706 which
|= (e
< nelt
? 1 : 2);
28709 /* For all elements from second vector, fold the elements to first. */
28711 for (i
= 0; i
< nelt
; ++i
)
28714 /* Check whether the mask can be applied to the vector type. */
28715 d
.one_vector_p
= (which
!= 3);
28717 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
28718 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
28719 if (!d
.one_vector_p
)
28720 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
28723 ret
= arm_expand_vec_perm_const_1 (&d
);
28730 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
28732 /* If we are soft float and we do not have ldrd
28733 then all auto increment forms are ok. */
28734 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
28739 /* Post increment and Pre Decrement are supported for all
28740 instruction forms except for vector forms. */
28743 if (VECTOR_MODE_P (mode
))
28745 if (code
!= ARM_PRE_DEC
)
28755 /* Without LDRD and mode size greater than
28756 word size, there is no point in auto-incrementing
28757 because ldm and stm will not have these forms. */
28758 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
28761 /* Vector and floating point modes do not support
28762 these auto increment forms. */
28763 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
28776 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28777 on ARM, since we know that shifts by negative amounts are no-ops.
28778 Additionally, the default expansion code is not available or suitable
28779 for post-reload insn splits (this can occur when the register allocator
28780 chooses not to do a shift in NEON).
28782 This function is used in both initial expand and post-reload splits, and
28783 handles all kinds of 64-bit shifts.
28785 Input requirements:
28786 - It is safe for the input and output to be the same register, but
28787 early-clobber rules apply for the shift amount and scratch registers.
28788 - Shift by register requires both scratch registers. In all other cases
28789 the scratch registers may be NULL.
28790 - Ashiftrt by a register also clobbers the CC register. */
28792 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
28793 rtx amount
, rtx scratch1
, rtx scratch2
)
28795 rtx out_high
= gen_highpart (SImode
, out
);
28796 rtx out_low
= gen_lowpart (SImode
, out
);
28797 rtx in_high
= gen_highpart (SImode
, in
);
28798 rtx in_low
= gen_lowpart (SImode
, in
);
28801 in = the register pair containing the input value.
28802 out = the destination register pair.
28803 up = the high- or low-part of each pair.
28804 down = the opposite part to "up".
28805 In a shift, we can consider bits to shift from "up"-stream to
28806 "down"-stream, so in a left-shift "up" is the low-part and "down"
28807 is the high-part of each register pair. */
28809 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
28810 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
28811 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
28812 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
28814 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
28816 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
28817 && GET_MODE (out
) == DImode
);
28819 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
28820 && GET_MODE (in
) == DImode
);
28822 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28823 && GET_MODE (amount
) == SImode
)
28824 || CONST_INT_P (amount
)));
28825 gcc_assert (scratch1
== NULL
28826 || (GET_CODE (scratch1
) == SCRATCH
)
28827 || (GET_MODE (scratch1
) == SImode
28828 && REG_P (scratch1
)));
28829 gcc_assert (scratch2
== NULL
28830 || (GET_CODE (scratch2
) == SCRATCH
)
28831 || (GET_MODE (scratch2
) == SImode
28832 && REG_P (scratch2
)));
28833 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28834 || !HARD_REGISTER_P (out
)
28835 || (REGNO (out
) != REGNO (amount
)
28836 && REGNO (out
) + 1 != REGNO (amount
)));
28838 /* Macros to make following code more readable. */
28839 #define SUB_32(DEST,SRC) \
28840 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28841 #define RSB_32(DEST,SRC) \
28842 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28843 #define SUB_S_32(DEST,SRC) \
28844 gen_addsi3_compare0 ((DEST), (SRC), \
28846 #define SET(DEST,SRC) \
28847 gen_rtx_SET ((DEST), (SRC))
28848 #define SHIFT(CODE,SRC,AMOUNT) \
28849 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28850 #define LSHIFT(CODE,SRC,AMOUNT) \
28851 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28852 SImode, (SRC), (AMOUNT))
28853 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28854 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28855 SImode, (SRC), (AMOUNT))
28857 gen_rtx_IOR (SImode, (A), (B))
28858 #define BRANCH(COND,LABEL) \
28859 gen_arm_cond_branch ((LABEL), \
28860 gen_rtx_ ## COND (CCmode, cc_reg, \
28864 /* Shifts by register and shifts by constant are handled separately. */
28865 if (CONST_INT_P (amount
))
28867 /* We have a shift-by-constant. */
28869 /* First, handle out-of-range shift amounts.
28870 In both cases we try to match the result an ARM instruction in a
28871 shift-by-register would give. This helps reduce execution
28872 differences between optimization levels, but it won't stop other
28873 parts of the compiler doing different things. This is "undefined
28874 behaviour, in any case. */
28875 if (INTVAL (amount
) <= 0)
28876 emit_insn (gen_movdi (out
, in
));
28877 else if (INTVAL (amount
) >= 64)
28879 if (code
== ASHIFTRT
)
28881 rtx const31_rtx
= GEN_INT (31);
28882 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28883 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28886 emit_insn (gen_movdi (out
, const0_rtx
));
28889 /* Now handle valid shifts. */
28890 else if (INTVAL (amount
) < 32)
28892 /* Shifts by a constant less than 32. */
28893 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28895 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28896 emit_insn (SET (out_down
,
28897 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28899 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28903 /* Shifts by a constant greater than 31. */
28904 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28906 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28907 if (code
== ASHIFTRT
)
28908 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28911 emit_insn (SET (out_up
, const0_rtx
));
28916 /* We have a shift-by-register. */
28917 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28919 /* This alternative requires the scratch registers. */
28920 gcc_assert (scratch1
&& REG_P (scratch1
));
28921 gcc_assert (scratch2
&& REG_P (scratch2
));
28923 /* We will need the values "amount-32" and "32-amount" later.
28924 Swapping them around now allows the later code to be more general. */
28928 emit_insn (SUB_32 (scratch1
, amount
));
28929 emit_insn (RSB_32 (scratch2
, amount
));
28932 emit_insn (RSB_32 (scratch1
, amount
));
28933 /* Also set CC = amount > 32. */
28934 emit_insn (SUB_S_32 (scratch2
, amount
));
28937 emit_insn (RSB_32 (scratch1
, amount
));
28938 emit_insn (SUB_32 (scratch2
, amount
));
28941 gcc_unreachable ();
28944 /* Emit code like this:
28947 out_down = in_down << amount;
28948 out_down = (in_up << (amount - 32)) | out_down;
28949 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28950 out_up = in_up << amount;
28953 out_down = in_down >> amount;
28954 out_down = (in_up << (32 - amount)) | out_down;
28956 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28957 out_up = in_up << amount;
28960 out_down = in_down >> amount;
28961 out_down = (in_up << (32 - amount)) | out_down;
28963 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28964 out_up = in_up << amount;
28966 The ARM and Thumb2 variants are the same but implemented slightly
28967 differently. If this were only called during expand we could just
28968 use the Thumb2 case and let combine do the right thing, but this
28969 can also be called from post-reload splitters. */
28971 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28973 if (!TARGET_THUMB2
)
28975 /* Emit code for ARM mode. */
28976 emit_insn (SET (out_down
,
28977 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28978 if (code
== ASHIFTRT
)
28980 rtx_code_label
*done_label
= gen_label_rtx ();
28981 emit_jump_insn (BRANCH (LT
, done_label
));
28982 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28984 emit_label (done_label
);
28987 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28992 /* Emit code for Thumb2 mode.
28993 Thumb2 can't do shift and or in one insn. */
28994 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28995 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28997 if (code
== ASHIFTRT
)
28999 rtx_code_label
*done_label
= gen_label_rtx ();
29000 emit_jump_insn (BRANCH (LT
, done_label
));
29001 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29002 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29003 emit_label (done_label
);
29007 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29008 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29012 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29026 /* Returns true if the pattern is a valid symbolic address, which is either a
29027 symbol_ref or (symbol_ref + addend).
29029 According to the ARM ELF ABI, the initial addend of REL-type relocations
29030 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29031 literal field of the instruction as a 16-bit signed value in the range
29032 -32768 <= A < 32768. */
29035 arm_valid_symbolic_address_p (rtx addr
)
29037 rtx xop0
, xop1
= NULL_RTX
;
29040 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29043 /* (const (plus: symbol_ref const_int)) */
29044 if (GET_CODE (addr
) == CONST
)
29045 tmp
= XEXP (addr
, 0);
29047 if (GET_CODE (tmp
) == PLUS
)
29049 xop0
= XEXP (tmp
, 0);
29050 xop1
= XEXP (tmp
, 1);
29052 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29053 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29059 /* Returns true if a valid comparison operation and makes
29060 the operands in a form that is valid. */
29062 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29064 enum rtx_code code
= GET_CODE (*comparison
);
29066 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29067 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29069 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29071 if (code
== UNEQ
|| code
== LTGT
)
29074 code_int
= (int)code
;
29075 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29076 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29081 if (!arm_add_operand (*op1
, mode
))
29082 *op1
= force_reg (mode
, *op1
);
29083 if (!arm_add_operand (*op2
, mode
))
29084 *op2
= force_reg (mode
, *op2
);
29088 if (!cmpdi_operand (*op1
, mode
))
29089 *op1
= force_reg (mode
, *op1
);
29090 if (!cmpdi_operand (*op2
, mode
))
29091 *op2
= force_reg (mode
, *op2
);
29096 if (!arm_float_compare_operand (*op1
, mode
))
29097 *op1
= force_reg (mode
, *op1
);
29098 if (!arm_float_compare_operand (*op2
, mode
))
29099 *op2
= force_reg (mode
, *op2
);
29109 /* Maximum number of instructions to set block of memory. */
29111 arm_block_set_max_insns (void)
29113 if (optimize_function_for_size_p (cfun
))
29116 return current_tune
->max_insns_inline_memset
;
29119 /* Return TRUE if it's profitable to set block of memory for
29120 non-vectorized case. VAL is the value to set the memory
29121 with. LENGTH is the number of bytes to set. ALIGN is the
29122 alignment of the destination memory in bytes. UNALIGNED_P
29123 is TRUE if we can only set the memory with instructions
29124 meeting alignment requirements. USE_STRD_P is TRUE if we
29125 can use strd to set the memory. */
29127 arm_block_set_non_vect_profit_p (rtx val
,
29128 unsigned HOST_WIDE_INT length
,
29129 unsigned HOST_WIDE_INT align
,
29130 bool unaligned_p
, bool use_strd_p
)
29133 /* For leftovers in bytes of 0-7, we can set the memory block using
29134 strb/strh/str with minimum instruction number. */
29135 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29139 num
= arm_const_inline_cost (SET
, val
);
29140 num
+= length
/ align
+ length
% align
;
29142 else if (use_strd_p
)
29144 num
= arm_const_double_inline_cost (val
);
29145 num
+= (length
>> 3) + leftover
[length
& 7];
29149 num
= arm_const_inline_cost (SET
, val
);
29150 num
+= (length
>> 2) + leftover
[length
& 3];
29153 /* We may be able to combine last pair STRH/STRB into a single STR
29154 by shifting one byte back. */
29155 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29158 return (num
<= arm_block_set_max_insns ());
29161 /* Return TRUE if it's profitable to set block of memory for
29162 vectorized case. LENGTH is the number of bytes to set.
29163 ALIGN is the alignment of destination memory in bytes.
29164 MODE is the vector mode used to set the memory. */
29166 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29167 unsigned HOST_WIDE_INT align
,
29171 bool unaligned_p
= ((align
& 3) != 0);
29172 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29174 /* Instruction loading constant value. */
29176 /* Instructions storing the memory. */
29177 num
+= (length
+ nelt
- 1) / nelt
;
29178 /* Instructions adjusting the address expression. Only need to
29179 adjust address expression if it's 4 bytes aligned and bytes
29180 leftover can only be stored by mis-aligned store instruction. */
29181 if (!unaligned_p
&& (length
& 3) != 0)
29184 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29185 if (!unaligned_p
&& mode
== V16QImode
)
29188 return (num
<= arm_block_set_max_insns ());
29191 /* Set a block of memory using vectorization instructions for the
29192 unaligned case. We fill the first LENGTH bytes of the memory
29193 area starting from DSTBASE with byte constant VALUE. ALIGN is
29194 the alignment requirement of memory. Return TRUE if succeeded. */
29196 arm_block_set_unaligned_vect (rtx dstbase
,
29197 unsigned HOST_WIDE_INT length
,
29198 unsigned HOST_WIDE_INT value
,
29199 unsigned HOST_WIDE_INT align
)
29201 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29203 rtx val_elt
, val_vec
, reg
;
29204 rtx rval
[MAX_VECT_LEN
];
29205 rtx (*gen_func
) (rtx
, rtx
);
29207 unsigned HOST_WIDE_INT v
= value
;
29209 gcc_assert ((align
& 0x3) != 0);
29210 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29211 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29212 if (length
>= nelt_v16
)
29215 gen_func
= gen_movmisalignv16qi
;
29220 gen_func
= gen_movmisalignv8qi
;
29222 nelt_mode
= GET_MODE_NUNITS (mode
);
29223 gcc_assert (length
>= nelt_mode
);
29224 /* Skip if it isn't profitable. */
29225 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29228 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29229 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29231 v
= sext_hwi (v
, BITS_PER_WORD
);
29232 val_elt
= GEN_INT (v
);
29233 for (j
= 0; j
< nelt_mode
; j
++)
29236 reg
= gen_reg_rtx (mode
);
29237 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29238 /* Emit instruction loading the constant value. */
29239 emit_move_insn (reg
, val_vec
);
29241 /* Handle nelt_mode bytes in a vector. */
29242 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29244 emit_insn ((*gen_func
) (mem
, reg
));
29245 if (i
+ 2 * nelt_mode
<= length
)
29246 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29249 /* If there are not less than nelt_v8 bytes leftover, we must be in
29251 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29253 /* Handle (8, 16) bytes leftover. */
29254 if (i
+ nelt_v8
< length
)
29256 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29257 /* We are shifting bytes back, set the alignment accordingly. */
29258 if ((length
& 1) != 0 && align
>= 2)
29259 set_mem_align (mem
, BITS_PER_UNIT
);
29261 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29263 /* Handle (0, 8] bytes leftover. */
29264 else if (i
< length
&& i
+ nelt_v8
>= length
)
29266 if (mode
== V16QImode
)
29268 reg
= gen_lowpart (V8QImode
, reg
);
29269 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, 0);
29271 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29272 + (nelt_mode
- nelt_v8
))));
29273 /* We are shifting bytes back, set the alignment accordingly. */
29274 if ((length
& 1) != 0 && align
>= 2)
29275 set_mem_align (mem
, BITS_PER_UNIT
);
29277 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29283 /* Set a block of memory using vectorization instructions for the
29284 aligned case. We fill the first LENGTH bytes of the memory area
29285 starting from DSTBASE with byte constant VALUE. ALIGN is the
29286 alignment requirement of memory. Return TRUE if succeeded. */
29288 arm_block_set_aligned_vect (rtx dstbase
,
29289 unsigned HOST_WIDE_INT length
,
29290 unsigned HOST_WIDE_INT value
,
29291 unsigned HOST_WIDE_INT align
)
29293 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29294 rtx dst
, addr
, mem
;
29295 rtx val_elt
, val_vec
, reg
;
29296 rtx rval
[MAX_VECT_LEN
];
29298 unsigned HOST_WIDE_INT v
= value
;
29300 gcc_assert ((align
& 0x3) == 0);
29301 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29302 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29303 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29308 nelt_mode
= GET_MODE_NUNITS (mode
);
29309 gcc_assert (length
>= nelt_mode
);
29310 /* Skip if it isn't profitable. */
29311 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29314 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29316 v
= sext_hwi (v
, BITS_PER_WORD
);
29317 val_elt
= GEN_INT (v
);
29318 for (j
= 0; j
< nelt_mode
; j
++)
29321 reg
= gen_reg_rtx (mode
);
29322 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29323 /* Emit instruction loading the constant value. */
29324 emit_move_insn (reg
, val_vec
);
29327 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29328 if (mode
== V16QImode
)
29330 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29331 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29333 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29334 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29336 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29337 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29338 /* We are shifting bytes back, set the alignment accordingly. */
29339 if ((length
& 0x3) == 0)
29340 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29341 else if ((length
& 0x1) == 0)
29342 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29344 set_mem_align (mem
, BITS_PER_UNIT
);
29346 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29349 /* Fall through for bytes leftover. */
29351 nelt_mode
= GET_MODE_NUNITS (mode
);
29352 reg
= gen_lowpart (V8QImode
, reg
);
29355 /* Handle 8 bytes in a vector. */
29356 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29358 addr
= plus_constant (Pmode
, dst
, i
);
29359 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29360 emit_move_insn (mem
, reg
);
29363 /* Handle single word leftover by shifting 4 bytes back. We can
29364 use aligned access for this case. */
29365 if (i
+ UNITS_PER_WORD
== length
)
29367 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29368 mem
= adjust_automodify_address (dstbase
, mode
,
29369 addr
, i
- UNITS_PER_WORD
);
29370 /* We are shifting 4 bytes back, set the alignment accordingly. */
29371 if (align
> UNITS_PER_WORD
)
29372 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29374 emit_move_insn (mem
, reg
);
29376 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29377 We have to use unaligned access for this case. */
29378 else if (i
< length
)
29380 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29381 mem
= adjust_automodify_address (dstbase
, mode
, dst
, 0);
29382 /* We are shifting bytes back, set the alignment accordingly. */
29383 if ((length
& 1) == 0)
29384 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29386 set_mem_align (mem
, BITS_PER_UNIT
);
29388 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29394 /* Set a block of memory using plain strh/strb instructions, only
29395 using instructions allowed by ALIGN on processor. We fill the
29396 first LENGTH bytes of the memory area starting from DSTBASE
29397 with byte constant VALUE. ALIGN is the alignment requirement
29400 arm_block_set_unaligned_non_vect (rtx dstbase
,
29401 unsigned HOST_WIDE_INT length
,
29402 unsigned HOST_WIDE_INT value
,
29403 unsigned HOST_WIDE_INT align
)
29406 rtx dst
, addr
, mem
;
29407 rtx val_exp
, val_reg
, reg
;
29409 HOST_WIDE_INT v
= value
;
29411 gcc_assert (align
== 1 || align
== 2);
29414 v
|= (value
<< BITS_PER_UNIT
);
29416 v
= sext_hwi (v
, BITS_PER_WORD
);
29417 val_exp
= GEN_INT (v
);
29418 /* Skip if it isn't profitable. */
29419 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29420 align
, true, false))
29423 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29424 mode
= (align
== 2 ? HImode
: QImode
);
29425 val_reg
= force_reg (SImode
, val_exp
);
29426 reg
= gen_lowpart (mode
, val_reg
);
29428 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29430 addr
= plus_constant (Pmode
, dst
, i
);
29431 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29432 emit_move_insn (mem
, reg
);
29435 /* Handle single byte leftover. */
29436 if (i
+ 1 == length
)
29438 reg
= gen_lowpart (QImode
, val_reg
);
29439 addr
= plus_constant (Pmode
, dst
, i
);
29440 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29441 emit_move_insn (mem
, reg
);
29445 gcc_assert (i
== length
);
29449 /* Set a block of memory using plain strd/str/strh/strb instructions,
29450 to permit unaligned copies on processors which support unaligned
29451 semantics for those instructions. We fill the first LENGTH bytes
29452 of the memory area starting from DSTBASE with byte constant VALUE.
29453 ALIGN is the alignment requirement of memory. */
29455 arm_block_set_aligned_non_vect (rtx dstbase
,
29456 unsigned HOST_WIDE_INT length
,
29457 unsigned HOST_WIDE_INT value
,
29458 unsigned HOST_WIDE_INT align
)
29461 rtx dst
, addr
, mem
;
29462 rtx val_exp
, val_reg
, reg
;
29463 unsigned HOST_WIDE_INT v
;
29466 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29467 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29469 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29470 if (length
< UNITS_PER_WORD
)
29471 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29474 v
|= (v
<< BITS_PER_WORD
);
29476 v
= sext_hwi (v
, BITS_PER_WORD
);
29478 val_exp
= GEN_INT (v
);
29479 /* Skip if it isn't profitable. */
29480 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29481 align
, false, use_strd_p
))
29486 /* Try without strd. */
29487 v
= (v
>> BITS_PER_WORD
);
29488 v
= sext_hwi (v
, BITS_PER_WORD
);
29489 val_exp
= GEN_INT (v
);
29490 use_strd_p
= false;
29491 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29492 align
, false, use_strd_p
))
29497 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29498 /* Handle double words using strd if possible. */
29501 val_reg
= force_reg (DImode
, val_exp
);
29503 for (; (i
+ 8 <= length
); i
+= 8)
29505 addr
= plus_constant (Pmode
, dst
, i
);
29506 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
29507 emit_move_insn (mem
, reg
);
29511 val_reg
= force_reg (SImode
, val_exp
);
29513 /* Handle words. */
29514 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
29515 for (; (i
+ 4 <= length
); i
+= 4)
29517 addr
= plus_constant (Pmode
, dst
, i
);
29518 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
29519 if ((align
& 3) == 0)
29520 emit_move_insn (mem
, reg
);
29522 emit_insn (gen_unaligned_storesi (mem
, reg
));
29525 /* Merge last pair of STRH and STRB into a STR if possible. */
29526 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
29528 addr
= plus_constant (Pmode
, dst
, i
- 1);
29529 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
29530 /* We are shifting one byte back, set the alignment accordingly. */
29531 if ((align
& 1) == 0)
29532 set_mem_align (mem
, BITS_PER_UNIT
);
29534 /* Most likely this is an unaligned access, and we can't tell at
29535 compilation time. */
29536 emit_insn (gen_unaligned_storesi (mem
, reg
));
29540 /* Handle half word leftover. */
29541 if (i
+ 2 <= length
)
29543 reg
= gen_lowpart (HImode
, val_reg
);
29544 addr
= plus_constant (Pmode
, dst
, i
);
29545 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
29546 if ((align
& 1) == 0)
29547 emit_move_insn (mem
, reg
);
29549 emit_insn (gen_unaligned_storehi (mem
, reg
));
29554 /* Handle single byte leftover. */
29555 if (i
+ 1 == length
)
29557 reg
= gen_lowpart (QImode
, val_reg
);
29558 addr
= plus_constant (Pmode
, dst
, i
);
29559 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29560 emit_move_insn (mem
, reg
);
29566 /* Set a block of memory using vectorization instructions for both
29567 aligned and unaligned cases. We fill the first LENGTH bytes of
29568 the memory area starting from DSTBASE with byte constant VALUE.
29569 ALIGN is the alignment requirement of memory. */
29571 arm_block_set_vect (rtx dstbase
,
29572 unsigned HOST_WIDE_INT length
,
29573 unsigned HOST_WIDE_INT value
,
29574 unsigned HOST_WIDE_INT align
)
29576 /* Check whether we need to use unaligned store instruction. */
29577 if (((align
& 3) != 0 || (length
& 3) != 0)
29578 /* Check whether unaligned store instruction is available. */
29579 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
29582 if ((align
& 3) == 0)
29583 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
29585 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
29588 /* Expand string store operation. Firstly we try to do that by using
29589 vectorization instructions, then try with ARM unaligned access and
29590 double-word store if profitable. OPERANDS[0] is the destination,
29591 OPERANDS[1] is the number of bytes, operands[2] is the value to
29592 initialize the memory, OPERANDS[3] is the known alignment of the
29595 arm_gen_setmem (rtx
*operands
)
29597 rtx dstbase
= operands
[0];
29598 unsigned HOST_WIDE_INT length
;
29599 unsigned HOST_WIDE_INT value
;
29600 unsigned HOST_WIDE_INT align
;
29602 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
29605 length
= UINTVAL (operands
[1]);
29609 value
= (UINTVAL (operands
[2]) & 0xFF);
29610 align
= UINTVAL (operands
[3]);
29611 if (TARGET_NEON
&& length
>= 8
29612 && current_tune
->string_ops_prefer_neon
29613 && arm_block_set_vect (dstbase
, length
, value
, align
))
29616 if (!unaligned_access
&& (align
& 3) != 0)
29617 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
29619 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
29624 arm_macro_fusion_p (void)
29626 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
29631 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
29634 rtx prev_set
= single_set (prev
);
29635 rtx curr_set
= single_set (curr
);
29641 if (any_condjump_p (curr
))
29644 if (!arm_macro_fusion_p ())
29647 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
)
29649 /* We are trying to fuse
29650 movw imm / movt imm
29651 instructions as a group that gets scheduled together. */
29653 set_dest
= SET_DEST (curr_set
);
29655 if (GET_MODE (set_dest
) != SImode
)
29658 /* We are trying to match:
29659 prev (movw) == (set (reg r0) (const_int imm16))
29660 curr (movt) == (set (zero_extract (reg r0)
29663 (const_int imm16_1))
29665 prev (movw) == (set (reg r1)
29666 (high (symbol_ref ("SYM"))))
29667 curr (movt) == (set (reg r0)
29669 (symbol_ref ("SYM")))) */
29670 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
29672 if (CONST_INT_P (SET_SRC (curr_set
))
29673 && CONST_INT_P (SET_SRC (prev_set
))
29674 && REG_P (XEXP (set_dest
, 0))
29675 && REG_P (SET_DEST (prev_set
))
29676 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
29679 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
29680 && REG_P (SET_DEST (curr_set
))
29681 && REG_P (SET_DEST (prev_set
))
29682 && GET_CODE (SET_SRC (prev_set
)) == HIGH
29683 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
29689 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29691 static unsigned HOST_WIDE_INT
29692 arm_asan_shadow_offset (void)
29694 return (unsigned HOST_WIDE_INT
) 1 << 29;
29698 /* This is a temporary fix for PR60655. Ideally we need
29699 to handle most of these cases in the generic part but
29700 currently we reject minus (..) (sym_ref). We try to
29701 ameliorate the case with minus (sym_ref1) (sym_ref2)
29702 where they are in the same section. */
29705 arm_const_not_ok_for_debug_p (rtx p
)
29707 tree decl_op0
= NULL
;
29708 tree decl_op1
= NULL
;
29710 if (GET_CODE (p
) == MINUS
)
29712 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
29714 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
29716 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
29717 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
29719 if ((TREE_CODE (decl_op1
) == VAR_DECL
29720 || TREE_CODE (decl_op1
) == CONST_DECL
)
29721 && (TREE_CODE (decl_op0
) == VAR_DECL
29722 || TREE_CODE (decl_op0
) == CONST_DECL
))
29723 return (get_variable_section (decl_op1
, false)
29724 != get_variable_section (decl_op0
, false));
29726 if (TREE_CODE (decl_op1
) == LABEL_DECL
29727 && TREE_CODE (decl_op0
) == LABEL_DECL
)
29728 return (DECL_CONTEXT (decl_op1
)
29729 != DECL_CONTEXT (decl_op0
));
29739 /* return TRUE if x is a reference to a value in a constant pool */
29741 arm_is_constant_pool_ref (rtx x
)
29744 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
29745 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
29748 /* Remember the last target of arm_set_current_function. */
29749 static GTY(()) tree arm_previous_fndecl
;
29751 /* Invalidate arm_previous_fndecl. */
29753 arm_reset_previous_fndecl (void)
29755 arm_previous_fndecl
= NULL_TREE
;
29758 /* Establish appropriate back-end context for processing the function
29759 FNDECL. The argument might be NULL to indicate processing at top
29760 level, outside of any function scope. */
29762 arm_set_current_function (tree fndecl
)
29764 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
29767 tree old_tree
= (arm_previous_fndecl
29768 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
29771 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
29773 arm_previous_fndecl
= fndecl
;
29774 if (old_tree
== new_tree
)
29777 if (new_tree
&& new_tree
!= target_option_default_node
)
29779 cl_target_option_restore (&global_options
,
29780 TREE_TARGET_OPTION (new_tree
));
29782 if (TREE_TARGET_GLOBALS (new_tree
))
29783 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
29785 TREE_TARGET_GLOBALS (new_tree
)
29786 = save_target_globals_default_opts ();
29789 else if (old_tree
&& old_tree
!= target_option_default_node
)
29791 new_tree
= target_option_current_node
;
29793 cl_target_option_restore (&global_options
,
29794 TREE_TARGET_OPTION (new_tree
));
29795 if (TREE_TARGET_GLOBALS (new_tree
))
29796 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
29797 else if (new_tree
== target_option_default_node
)
29798 restore_target_globals (&default_target_globals
);
29800 TREE_TARGET_GLOBALS (new_tree
)
29801 = save_target_globals_default_opts ();
29804 arm_option_params_internal ();
29807 /* Implement TARGET_OPTION_PRINT. */
29810 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
29812 int flags
= ptr
->x_target_flags
;
29814 fprintf (file
, "%*sselected arch %s\n", indent
, "",
29815 TARGET_THUMB2_P (flags
) ? "thumb2" :
29816 TARGET_THUMB_P (flags
) ? "thumb1" :
29820 /* Hook to determine if one function can safely inline another. */
29823 arm_can_inline_p (tree caller ATTRIBUTE_UNUSED
, tree callee ATTRIBUTE_UNUSED
)
29825 /* Overidde default hook: Always OK to inline between different modes.
29826 Function with mode specific instructions, e.g using asm, must be explicitely
29827 protected with noinline. */
29831 /* Hook to fix function's alignment affected by target attribute. */
29834 arm_relayout_function (tree fndecl
)
29836 if (DECL_USER_ALIGN (fndecl
))
29839 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
29842 callee_tree
= target_option_default_node
;
29844 DECL_ALIGN (fndecl
) =
29845 FUNCTION_BOUNDARY_P (TREE_TARGET_OPTION (callee_tree
)->x_target_flags
);
29848 /* Inner function to process the attribute((target(...))), take an argument and
29849 set the current options from the argument. If we have a list, recursively
29850 go over the list. */
29853 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
29855 if (TREE_CODE (args
) == TREE_LIST
)
29858 for (; args
; args
= TREE_CHAIN (args
))
29859 if (TREE_VALUE (args
)
29860 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
29865 else if (TREE_CODE (args
) != STRING_CST
)
29867 error ("attribute %<target%> argument not a string");
29871 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
29872 while (argstr
&& *argstr
!= '\0')
29874 while (ISSPACE (*argstr
))
29877 if (!strcmp (argstr
, "thumb"))
29879 opts
->x_target_flags
|= MASK_THUMB
;
29880 arm_option_check_internal (opts
);
29884 if (!strcmp (argstr
, "arm"))
29886 opts
->x_target_flags
&= ~MASK_THUMB
;
29887 arm_option_check_internal (opts
);
29891 warning (0, "attribute(target(\"%s\")) is unknown", argstr
);
29898 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29901 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
29902 struct gcc_options
*opts_set
)
29904 if (!arm_valid_target_attribute_rec (args
, opts
))
29907 /* Do any overrides, such as global options arch=xxx. */
29908 arm_option_override_internal (opts
, opts_set
);
29910 return build_target_option_node (opts
);
29914 add_attribute (const char * mode
, tree
*attributes
)
29916 size_t len
= strlen (mode
);
29917 tree value
= build_string (len
, mode
);
29919 TREE_TYPE (value
) = build_array_type (char_type_node
,
29920 build_index_type (size_int (len
)));
29922 *attributes
= tree_cons (get_identifier ("target"),
29923 build_tree_list (NULL_TREE
, value
),
29927 /* For testing. Insert thumb or arm modes alternatively on functions. */
29930 arm_insert_attributes (tree fndecl
, tree
* attributes
)
29934 if (! TARGET_FLIP_THUMB
)
29937 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
29938 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
29941 /* Nested definitions must inherit mode. */
29942 if (current_function_decl
)
29944 mode
= TARGET_THUMB
? "thumb" : "arm";
29945 add_attribute (mode
, attributes
);
29949 /* If there is already a setting don't change it. */
29950 if (lookup_attribute ("target", *attributes
) != NULL
)
29953 mode
= thumb_flipper
? "thumb" : "arm";
29954 add_attribute (mode
, attributes
);
29956 thumb_flipper
= !thumb_flipper
;
29959 /* Hook to validate attribute((target("string"))). */
29962 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
29963 tree args
, int ARG_UNUSED (flags
))
29966 struct gcc_options func_options
;
29967 tree cur_tree
, new_optimize
;
29968 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
29970 /* Get the optimization options of the current function. */
29971 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
29973 /* If the function changed the optimization levels as well as setting target
29974 options, start with the optimizations specified. */
29975 if (!func_optimize
)
29976 func_optimize
= optimization_default_node
;
29978 /* Init func_options. */
29979 memset (&func_options
, 0, sizeof (func_options
));
29980 init_options_struct (&func_options
, NULL
);
29981 lang_hooks
.init_options_struct (&func_options
);
29983 /* Initialize func_options to the defaults. */
29984 cl_optimization_restore (&func_options
,
29985 TREE_OPTIMIZATION (func_optimize
));
29987 cl_target_option_restore (&func_options
,
29988 TREE_TARGET_OPTION (target_option_default_node
));
29990 /* Set func_options flags with new target mode. */
29991 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
29992 &global_options_set
);
29994 if (cur_tree
== NULL_TREE
)
29997 new_optimize
= build_optimization_node (&func_options
);
29999 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30001 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30007 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30009 if (TARGET_UNIFIED_ASM
)
30010 fprintf (stream
, "\t.syntax unified\n");
30012 fprintf (stream
, "\t.syntax divided\n");
30016 if (is_called_in_ARM_mode (decl
)
30017 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30018 && cfun
->is_thunk
))
30019 fprintf (stream
, "\t.code 32\n");
30020 else if (TARGET_THUMB1
)
30021 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30023 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30026 fprintf (stream
, "\t.arm\n");
30028 if (TARGET_POKE_FUNCTION_NAME
)
30029 arm_poke_function_name (stream
, (const char *) name
);
30032 /* If MEM is in the form of [base+offset], extract the two parts
30033 of address and set to BASE and OFFSET, otherwise return false
30034 after clearing BASE and OFFSET. */
30037 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30041 gcc_assert (MEM_P (mem
));
30043 addr
= XEXP (mem
, 0);
30045 /* Strip off const from addresses like (const (addr)). */
30046 if (GET_CODE (addr
) == CONST
)
30047 addr
= XEXP (addr
, 0);
30049 if (GET_CODE (addr
) == REG
)
30052 *offset
= const0_rtx
;
30056 if (GET_CODE (addr
) == PLUS
30057 && GET_CODE (XEXP (addr
, 0)) == REG
30058 && CONST_INT_P (XEXP (addr
, 1)))
30060 *base
= XEXP (addr
, 0);
30061 *offset
= XEXP (addr
, 1);
30066 *offset
= NULL_RTX
;
30071 /* If INSN is a load or store of address in the form of [base+offset],
30072 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30073 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30074 otherwise return FALSE. */
30077 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30081 gcc_assert (INSN_P (insn
));
30082 x
= PATTERN (insn
);
30083 if (GET_CODE (x
) != SET
)
30087 dest
= SET_DEST (x
);
30088 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30091 extract_base_offset_in_addr (dest
, base
, offset
);
30093 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30096 extract_base_offset_in_addr (src
, base
, offset
);
30101 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30104 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30106 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30107 and PRI are only calculated for these instructions. For other instruction,
30108 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30109 instruction fusion can be supported by returning different priorities.
30111 It's important that irrelevant instructions get the largest FUSION_PRI. */
30114 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30115 int *fusion_pri
, int *pri
)
30121 gcc_assert (INSN_P (insn
));
30124 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30131 /* Load goes first. */
30133 *fusion_pri
= tmp
- 1;
30135 *fusion_pri
= tmp
- 2;
30139 /* INSN with smaller base register goes first. */
30140 tmp
-= ((REGNO (base
) & 0xff) << 20);
30142 /* INSN with smaller offset goes first. */
30143 off_val
= (int)(INTVAL (offset
));
30145 tmp
-= (off_val
& 0xfffff);
30147 tmp
+= ((- off_val
) & 0xfffff);
30152 #include "gt-arm.h"