1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
40 #include "diagnostic-core.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
61 #include "target-globals.h"
63 #include "tm-constrs.h"
65 #include "optabs-libfuncs.h"
67 /* This file should be included last. */
68 #include "target-def.h"
70 /* Forward definitions of types. */
71 typedef struct minipool_node Mnode
;
72 typedef struct minipool_fixup Mfix
;
74 void (*arm_lang_output_object_attributes_hook
)(void);
81 /* Forward function declarations. */
82 static bool arm_const_not_ok_for_debug_p (rtx
);
83 static bool arm_needs_doubleword_align (machine_mode
, const_tree
);
84 static int arm_compute_static_chain_stack_bytes (void);
85 static arm_stack_offsets
*arm_get_frame_offsets (void);
86 static void arm_add_gc_roots (void);
87 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
88 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
89 static unsigned bit_count (unsigned long);
90 static unsigned feature_count (const arm_feature_set
*);
91 static int arm_address_register_rtx_p (rtx
, int);
92 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
93 static bool is_called_in_ARM_mode (tree
);
94 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
95 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
96 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
97 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
98 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
99 inline static int thumb1_index_register_rtx_p (rtx
, int);
100 static int thumb_far_jump_used_p (void);
101 static bool thumb_force_lr_save (void);
102 static unsigned arm_size_return_regs (void);
103 static bool arm_assemble_integer (rtx
, unsigned int, int);
104 static void arm_print_operand (FILE *, rtx
, int);
105 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
106 static bool arm_print_operand_punct_valid_p (unsigned char code
);
107 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
108 static arm_cc
get_arm_condition_code (rtx
);
109 static const char *output_multi_immediate (rtx
*, const char *, const char *,
111 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
112 static struct machine_function
*arm_init_machine_status (void);
113 static void thumb_exit (FILE *, int);
114 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
115 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
116 static Mnode
*add_minipool_forward_ref (Mfix
*);
117 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
118 static Mnode
*add_minipool_backward_ref (Mfix
*);
119 static void assign_minipool_offsets (Mfix
*);
120 static void arm_print_value (FILE *, rtx
);
121 static void dump_minipool (rtx_insn
*);
122 static int arm_barrier_cost (rtx_insn
*);
123 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
124 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
125 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
127 static void arm_reorg (void);
128 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
129 static unsigned long arm_compute_save_reg0_reg12_mask (void);
130 static unsigned long arm_compute_save_reg_mask (void);
131 static unsigned long arm_isr_value (tree
);
132 static unsigned long arm_compute_func_type (void);
133 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
134 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
135 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
136 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
137 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
139 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
140 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
141 static int arm_comp_type_attributes (const_tree
, const_tree
);
142 static void arm_set_default_type_attributes (tree
);
143 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
144 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
145 static int optimal_immediate_sequence (enum rtx_code code
,
146 unsigned HOST_WIDE_INT val
,
147 struct four_ints
*return_sequence
);
148 static int optimal_immediate_sequence_1 (enum rtx_code code
,
149 unsigned HOST_WIDE_INT val
,
150 struct four_ints
*return_sequence
,
152 static int arm_get_strip_length (int);
153 static bool arm_function_ok_for_sibcall (tree
, tree
);
154 static machine_mode
arm_promote_function_mode (const_tree
,
157 static bool arm_return_in_memory (const_tree
, const_tree
);
158 static rtx
arm_function_value (const_tree
, const_tree
, bool);
159 static rtx
arm_libcall_value_1 (machine_mode
);
160 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
161 static bool arm_function_value_regno_p (const unsigned int);
162 static void arm_internal_label (FILE *, const char *, unsigned long);
163 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
165 static bool arm_have_conditional_execution (void);
166 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
167 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
168 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
169 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
170 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
171 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
172 static void emit_constant_insn (rtx cond
, rtx pattern
);
173 static rtx_insn
*emit_set_insn (rtx
, rtx
);
174 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
175 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
177 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
179 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
181 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
182 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
184 static rtx
aapcs_libcall_value (machine_mode
);
185 static int aapcs_select_return_coproc (const_tree
, const_tree
);
187 #ifdef OBJECT_FORMAT_ELF
188 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
189 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
192 static void arm_encode_section_info (tree
, rtx
, int);
195 static void arm_file_end (void);
196 static void arm_file_start (void);
197 static void arm_insert_attributes (tree
, tree
*);
199 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
201 static bool arm_pass_by_reference (cumulative_args_t
,
202 machine_mode
, const_tree
, bool);
203 static bool arm_promote_prototypes (const_tree
);
204 static bool arm_default_short_enums (void);
205 static bool arm_align_anon_bitfield (void);
206 static bool arm_return_in_msb (const_tree
);
207 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
208 static bool arm_return_in_memory (const_tree
, const_tree
);
210 static void arm_unwind_emit (FILE *, rtx_insn
*);
211 static bool arm_output_ttype (rtx
);
212 static void arm_asm_emit_except_personality (rtx
);
214 static void arm_asm_init_sections (void);
215 static rtx
arm_dwarf_register_span (rtx
);
217 static tree
arm_cxx_guard_type (void);
218 static bool arm_cxx_guard_mask_bit (void);
219 static tree
arm_get_cookie_size (tree
);
220 static bool arm_cookie_has_size (void);
221 static bool arm_cxx_cdtor_returns_this (void);
222 static bool arm_cxx_key_method_may_be_inline (void);
223 static void arm_cxx_determine_class_data_visibility (tree
);
224 static bool arm_cxx_class_data_always_comdat (void);
225 static bool arm_cxx_use_aeabi_atexit (void);
226 static void arm_init_libfuncs (void);
227 static tree
arm_build_builtin_va_list (void);
228 static void arm_expand_builtin_va_start (tree
, rtx
);
229 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
230 static void arm_option_override (void);
231 static void arm_override_options_after_change (void);
232 static void arm_option_print (FILE *, int, struct cl_target_option
*);
233 static void arm_set_current_function (tree
);
234 static bool arm_can_inline_p (tree
, tree
);
235 static void arm_relayout_function (tree
);
236 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
237 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
238 static bool arm_macro_fusion_p (void);
239 static bool arm_cannot_copy_insn_p (rtx_insn
*);
240 static int arm_issue_rate (void);
241 static int arm_first_cycle_multipass_dfa_lookahead (void);
242 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
243 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
244 static bool arm_output_addr_const_extra (FILE *, rtx
);
245 static bool arm_allocate_stack_slots_for_args (void);
246 static bool arm_warn_func_return (tree
);
247 static tree
arm_promoted_type (const_tree t
);
248 static tree
arm_convert_to_type (tree type
, tree expr
);
249 static bool arm_scalar_mode_supported_p (machine_mode
);
250 static bool arm_frame_pointer_required (void);
251 static bool arm_can_eliminate (const int, const int);
252 static void arm_asm_trampoline_template (FILE *);
253 static void arm_trampoline_init (rtx
, tree
, rtx
);
254 static rtx
arm_trampoline_adjust_address (rtx
);
255 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
256 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
257 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
258 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
259 static bool arm_array_mode_supported_p (machine_mode
,
260 unsigned HOST_WIDE_INT
);
261 static machine_mode
arm_preferred_simd_mode (machine_mode
);
262 static bool arm_class_likely_spilled_p (reg_class_t
);
263 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
264 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
265 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
269 static void arm_conditional_register_usage (void);
270 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
271 static unsigned int arm_autovectorize_vector_sizes (void);
272 static int arm_default_branch_cost (bool, bool);
273 static int arm_cortex_a5_branch_cost (bool, bool);
274 static int arm_cortex_m_branch_cost (bool, bool);
275 static int arm_cortex_m7_branch_cost (bool, bool);
277 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
278 const unsigned char *sel
);
280 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
282 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
284 int misalign ATTRIBUTE_UNUSED
);
285 static unsigned arm_add_stmt_cost (void *data
, int count
,
286 enum vect_cost_for_stmt kind
,
287 struct _stmt_vec_info
*stmt_info
,
289 enum vect_cost_model_location where
);
291 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
292 bool op0_preserve_value
);
293 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
295 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
296 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
298 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
299 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
300 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
302 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
304 /* Table of machine attributes. */
305 static const struct attribute_spec arm_attribute_table
[] =
307 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
308 affects_type_identity } */
309 /* Function calls made to this symbol must be done indirectly, because
310 it may lie outside of the 26 bit addressing range of a normal function
312 { "long_call", 0, 0, false, true, true, NULL
, false },
313 /* Whereas these functions are always known to reside within the 26 bit
315 { "short_call", 0, 0, false, true, true, NULL
, false },
316 /* Specify the procedure call conventions for a function. */
317 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
319 /* Interrupt Service Routines have special prologue and epilogue requirements. */
320 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
322 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
324 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
327 /* ARM/PE has three new attributes:
329 dllexport - for exporting a function/variable that will live in a dll
330 dllimport - for importing a function/variable from a dll
332 Microsoft allows multiple declspecs in one __declspec, separating
333 them with spaces. We do NOT support this. Instead, use __declspec
336 { "dllimport", 0, 0, true, false, false, NULL
, false },
337 { "dllexport", 0, 0, true, false, false, NULL
, false },
338 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
340 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
343 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
346 { NULL
, 0, 0, false, false, false, NULL
, false }
349 /* Initialize the GCC target structure. */
350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
351 #undef TARGET_MERGE_DECL_ATTRIBUTES
352 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
355 #undef TARGET_LEGITIMIZE_ADDRESS
356 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
358 #undef TARGET_ATTRIBUTE_TABLE
359 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
361 #undef TARGET_INSERT_ATTRIBUTES
362 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
364 #undef TARGET_ASM_FILE_START
365 #define TARGET_ASM_FILE_START arm_file_start
366 #undef TARGET_ASM_FILE_END
367 #define TARGET_ASM_FILE_END arm_file_end
369 #undef TARGET_ASM_ALIGNED_SI_OP
370 #define TARGET_ASM_ALIGNED_SI_OP NULL
371 #undef TARGET_ASM_INTEGER
372 #define TARGET_ASM_INTEGER arm_assemble_integer
374 #undef TARGET_PRINT_OPERAND
375 #define TARGET_PRINT_OPERAND arm_print_operand
376 #undef TARGET_PRINT_OPERAND_ADDRESS
377 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
378 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
379 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
381 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
382 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
384 #undef TARGET_ASM_FUNCTION_PROLOGUE
385 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
387 #undef TARGET_ASM_FUNCTION_EPILOGUE
388 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
390 #undef TARGET_CAN_INLINE_P
391 #define TARGET_CAN_INLINE_P arm_can_inline_p
393 #undef TARGET_RELAYOUT_FUNCTION
394 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
396 #undef TARGET_OPTION_OVERRIDE
397 #define TARGET_OPTION_OVERRIDE arm_option_override
399 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
400 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
402 #undef TARGET_OPTION_PRINT
403 #define TARGET_OPTION_PRINT arm_option_print
405 #undef TARGET_COMP_TYPE_ATTRIBUTES
406 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
408 #undef TARGET_SCHED_MACRO_FUSION_P
409 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
411 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
412 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
414 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
415 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
417 #undef TARGET_SCHED_ADJUST_COST
418 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
420 #undef TARGET_SET_CURRENT_FUNCTION
421 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
423 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
424 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
426 #undef TARGET_SCHED_REORDER
427 #define TARGET_SCHED_REORDER arm_sched_reorder
429 #undef TARGET_REGISTER_MOVE_COST
430 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
432 #undef TARGET_MEMORY_MOVE_COST
433 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
435 #undef TARGET_ENCODE_SECTION_INFO
437 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
439 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
442 #undef TARGET_STRIP_NAME_ENCODING
443 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
445 #undef TARGET_ASM_INTERNAL_LABEL
446 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
448 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
449 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
451 #undef TARGET_FUNCTION_VALUE
452 #define TARGET_FUNCTION_VALUE arm_function_value
454 #undef TARGET_LIBCALL_VALUE
455 #define TARGET_LIBCALL_VALUE arm_libcall_value
457 #undef TARGET_FUNCTION_VALUE_REGNO_P
458 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
460 #undef TARGET_ASM_OUTPUT_MI_THUNK
461 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
462 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
463 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
465 #undef TARGET_RTX_COSTS
466 #define TARGET_RTX_COSTS arm_rtx_costs
467 #undef TARGET_ADDRESS_COST
468 #define TARGET_ADDRESS_COST arm_address_cost
470 #undef TARGET_SHIFT_TRUNCATION_MASK
471 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
474 #undef TARGET_ARRAY_MODE_SUPPORTED_P
475 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
476 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
477 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
478 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
479 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
480 arm_autovectorize_vector_sizes
482 #undef TARGET_MACHINE_DEPENDENT_REORG
483 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
485 #undef TARGET_INIT_BUILTINS
486 #define TARGET_INIT_BUILTINS arm_init_builtins
487 #undef TARGET_EXPAND_BUILTIN
488 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
489 #undef TARGET_BUILTIN_DECL
490 #define TARGET_BUILTIN_DECL arm_builtin_decl
492 #undef TARGET_INIT_LIBFUNCS
493 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
495 #undef TARGET_PROMOTE_FUNCTION_MODE
496 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
497 #undef TARGET_PROMOTE_PROTOTYPES
498 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
499 #undef TARGET_PASS_BY_REFERENCE
500 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
501 #undef TARGET_ARG_PARTIAL_BYTES
502 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
503 #undef TARGET_FUNCTION_ARG
504 #define TARGET_FUNCTION_ARG arm_function_arg
505 #undef TARGET_FUNCTION_ARG_ADVANCE
506 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
507 #undef TARGET_FUNCTION_ARG_BOUNDARY
508 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
510 #undef TARGET_SETUP_INCOMING_VARARGS
511 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
513 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
514 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
516 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
517 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
518 #undef TARGET_TRAMPOLINE_INIT
519 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
520 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
521 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
523 #undef TARGET_WARN_FUNC_RETURN
524 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
526 #undef TARGET_DEFAULT_SHORT_ENUMS
527 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
529 #undef TARGET_ALIGN_ANON_BITFIELD
530 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
532 #undef TARGET_NARROW_VOLATILE_BITFIELD
533 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
535 #undef TARGET_CXX_GUARD_TYPE
536 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
538 #undef TARGET_CXX_GUARD_MASK_BIT
539 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
541 #undef TARGET_CXX_GET_COOKIE_SIZE
542 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
544 #undef TARGET_CXX_COOKIE_HAS_SIZE
545 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
547 #undef TARGET_CXX_CDTOR_RETURNS_THIS
548 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
550 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
551 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
553 #undef TARGET_CXX_USE_AEABI_ATEXIT
554 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
556 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
557 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
558 arm_cxx_determine_class_data_visibility
560 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
561 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
563 #undef TARGET_RETURN_IN_MSB
564 #define TARGET_RETURN_IN_MSB arm_return_in_msb
566 #undef TARGET_RETURN_IN_MEMORY
567 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
569 #undef TARGET_MUST_PASS_IN_STACK
570 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
573 #undef TARGET_ASM_UNWIND_EMIT
574 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
576 /* EABI unwinding tables use a different format for the typeinfo tables. */
577 #undef TARGET_ASM_TTYPE
578 #define TARGET_ASM_TTYPE arm_output_ttype
580 #undef TARGET_ARM_EABI_UNWINDER
581 #define TARGET_ARM_EABI_UNWINDER true
583 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
584 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
586 #undef TARGET_ASM_INIT_SECTIONS
587 #endif /* ARM_UNWIND_INFO */
588 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
590 #undef TARGET_DWARF_REGISTER_SPAN
591 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
593 #undef TARGET_CANNOT_COPY_INSN_P
594 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
597 #undef TARGET_HAVE_TLS
598 #define TARGET_HAVE_TLS true
601 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
602 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
604 #undef TARGET_LEGITIMATE_CONSTANT_P
605 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
607 #undef TARGET_CANNOT_FORCE_CONST_MEM
608 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
610 #undef TARGET_MAX_ANCHOR_OFFSET
611 #define TARGET_MAX_ANCHOR_OFFSET 4095
613 /* The minimum is set such that the total size of the block
614 for a particular anchor is -4088 + 1 + 4095 bytes, which is
615 divisible by eight, ensuring natural spacing of anchors. */
616 #undef TARGET_MIN_ANCHOR_OFFSET
617 #define TARGET_MIN_ANCHOR_OFFSET -4088
619 #undef TARGET_SCHED_ISSUE_RATE
620 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
622 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
623 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
624 arm_first_cycle_multipass_dfa_lookahead
626 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
627 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
628 arm_first_cycle_multipass_dfa_lookahead_guard
630 #undef TARGET_MANGLE_TYPE
631 #define TARGET_MANGLE_TYPE arm_mangle_type
633 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
634 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
636 #undef TARGET_BUILD_BUILTIN_VA_LIST
637 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
638 #undef TARGET_EXPAND_BUILTIN_VA_START
639 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
640 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
641 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
644 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
645 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
648 #undef TARGET_LEGITIMATE_ADDRESS_P
649 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
651 #undef TARGET_PREFERRED_RELOAD_CLASS
652 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
654 #undef TARGET_PROMOTED_TYPE
655 #define TARGET_PROMOTED_TYPE arm_promoted_type
657 #undef TARGET_CONVERT_TO_TYPE
658 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
660 #undef TARGET_SCALAR_MODE_SUPPORTED_P
661 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
663 #undef TARGET_FRAME_POINTER_REQUIRED
664 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
666 #undef TARGET_CAN_ELIMINATE
667 #define TARGET_CAN_ELIMINATE arm_can_eliminate
669 #undef TARGET_CONDITIONAL_REGISTER_USAGE
670 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
672 #undef TARGET_CLASS_LIKELY_SPILLED_P
673 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
675 #undef TARGET_VECTORIZE_BUILTINS
676 #define TARGET_VECTORIZE_BUILTINS
678 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
679 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
680 arm_builtin_vectorized_function
682 #undef TARGET_VECTOR_ALIGNMENT
683 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
685 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
686 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
687 arm_vector_alignment_reachable
689 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
690 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
691 arm_builtin_support_vector_misalignment
693 #undef TARGET_PREFERRED_RENAME_CLASS
694 #define TARGET_PREFERRED_RENAME_CLASS \
695 arm_preferred_rename_class
697 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
698 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
699 arm_vectorize_vec_perm_const_ok
701 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
702 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
703 arm_builtin_vectorization_cost
704 #undef TARGET_VECTORIZE_ADD_STMT_COST
705 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
707 #undef TARGET_CANONICALIZE_COMPARISON
708 #define TARGET_CANONICALIZE_COMPARISON \
709 arm_canonicalize_comparison
711 #undef TARGET_ASAN_SHADOW_OFFSET
712 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
714 #undef MAX_INSN_PER_IT_BLOCK
715 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
717 #undef TARGET_CAN_USE_DOLOOP_P
718 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
720 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
721 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
723 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
724 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
726 #undef TARGET_SCHED_FUSION_PRIORITY
727 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
729 #undef TARGET_ASM_FUNCTION_SECTION
730 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
732 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
733 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
735 #undef TARGET_SECTION_TYPE_FLAGS
736 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
738 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
739 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
741 struct gcc_target targetm
= TARGET_INITIALIZER
;
743 /* Obstack for minipool constant handling. */
744 static struct obstack minipool_obstack
;
745 static char * minipool_startobj
;
747 /* The maximum number of insns skipped which
748 will be conditionalised if possible. */
749 static int max_insns_skipped
= 5;
751 extern FILE * asm_out_file
;
753 /* True if we are currently building a constant table. */
754 int making_const_table
;
756 /* The processor for which instructions should be scheduled. */
757 enum processor_type arm_tune
= arm_none
;
759 /* The current tuning set. */
760 const struct tune_params
*current_tune
;
762 /* Which floating point hardware to schedule for. */
765 /* Used for Thumb call_via trampolines. */
766 rtx thumb_call_via_label
[14];
767 static int thumb_call_reg_needed
;
769 /* The bits in this mask specify which
770 instructions we are allowed to generate. */
771 arm_feature_set insn_flags
= ARM_FSET_EMPTY
;
773 /* The bits in this mask specify which instruction scheduling options should
775 arm_feature_set tune_flags
= ARM_FSET_EMPTY
;
777 /* The highest ARM architecture version supported by the
779 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
781 /* The following are used in the arm.md file as equivalents to bits
782 in the above two flag variables. */
784 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
787 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
790 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
793 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
796 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
799 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
802 /* Nonzero if this chip supports the ARM 6K extensions. */
805 /* Nonzero if this chip supports the ARM 6KZ extensions. */
808 /* Nonzero if instructions present in ARMv6-M can be used. */
811 /* Nonzero if this chip supports the ARM 7 extensions. */
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm
= 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
820 /* Nonzero if instructions present in ARMv8 can be used. */
823 /* Nonzero if this chip supports the ARMv8.1 extensions. */
826 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
829 /* Nonzero if this chip supports the FP16 instructions extension of ARM
831 int arm_fp16_inst
= 0;
833 /* Nonzero if this chip can benefit from load scheduling. */
834 int arm_ld_sched
= 0;
836 /* Nonzero if this chip is a StrongARM. */
837 int arm_tune_strongarm
= 0;
839 /* Nonzero if this chip supports Intel Wireless MMX technology. */
840 int arm_arch_iwmmxt
= 0;
842 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
843 int arm_arch_iwmmxt2
= 0;
845 /* Nonzero if this chip is an XScale. */
846 int arm_arch_xscale
= 0;
848 /* Nonzero if tuning for XScale */
849 int arm_tune_xscale
= 0;
851 /* Nonzero if we want to tune for stores that access the write-buffer.
852 This typically means an ARM6 or ARM7 with MMU or MPU. */
853 int arm_tune_wbuf
= 0;
855 /* Nonzero if tuning for Cortex-A9. */
856 int arm_tune_cortex_a9
= 0;
858 /* Nonzero if we should define __THUMB_INTERWORK__ in the
860 XXX This is a bit of a hack, it's intended to help work around
861 problems in GLD which doesn't understand that armv5t code is
862 interworking clean. */
863 int arm_cpp_interwork
= 0;
865 /* Nonzero if chip supports Thumb 1. */
868 /* Nonzero if chip supports Thumb 2. */
871 /* Nonzero if chip supports integer division instruction. */
872 int arm_arch_arm_hwdiv
;
873 int arm_arch_thumb_hwdiv
;
875 /* Nonzero if chip disallows volatile memory access in IT block. */
876 int arm_arch_no_volatile_ce
;
878 /* Nonzero if we should use Neon to handle 64-bits operations rather
879 than core registers. */
880 int prefer_neon_for_64bits
= 0;
882 /* Nonzero if we shouldn't use literal pools. */
883 bool arm_disable_literal_pool
= false;
885 /* The register number to be used for the PIC offset register. */
886 unsigned arm_pic_register
= INVALID_REGNUM
;
888 enum arm_pcs arm_pcs_default
;
890 /* For an explanation of these variables, see final_prescan_insn below. */
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc
;
896 int arm_target_label
;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count
= 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask
= 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen
= 0;
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc
= 0;
908 /* Nonzero if the core has a very small, high-latency, multiply unit. */
909 int arm_m_profile_small_mul
= 0;
911 /* The condition codes of the ARM, and the inverse function. */
912 static const char * const arm_condition_codes
[] =
914 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
915 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
918 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
919 int arm_regs_in_sequence
[] =
921 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
924 #define ARM_LSL_NAME "lsl"
925 #define streq(string1, string2) (strcmp (string1, string2) == 0)
927 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
928 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
929 | (1 << PIC_OFFSET_TABLE_REGNUM)))
931 /* Initialization code. */
935 const char *const name
;
936 enum processor_type core
;
938 enum base_architecture base_arch
;
939 const arm_feature_set flags
;
940 const struct tune_params
*const tune
;
944 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
945 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
952 /* arm generic vectorizer costs. */
954 struct cpu_vec_costs arm_default_vec_cost
= {
955 1, /* scalar_stmt_cost. */
956 1, /* scalar load_cost. */
957 1, /* scalar_store_cost. */
958 1, /* vec_stmt_cost. */
959 1, /* vec_to_scalar_cost. */
960 1, /* scalar_to_vec_cost. */
961 1, /* vec_align_load_cost. */
962 1, /* vec_unalign_load_cost. */
963 1, /* vec_unalign_store_cost. */
964 1, /* vec_store_cost. */
965 3, /* cond_taken_branch_cost. */
966 1, /* cond_not_taken_branch_cost. */
969 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
970 #include "aarch-cost-tables.h"
974 const struct cpu_cost_table cortexa9_extra_costs
=
981 COSTS_N_INSNS (1), /* shift_reg. */
982 COSTS_N_INSNS (1), /* arith_shift. */
983 COSTS_N_INSNS (2), /* arith_shift_reg. */
985 COSTS_N_INSNS (1), /* log_shift_reg. */
986 COSTS_N_INSNS (1), /* extend. */
987 COSTS_N_INSNS (2), /* extend_arith. */
988 COSTS_N_INSNS (1), /* bfi. */
989 COSTS_N_INSNS (1), /* bfx. */
993 true /* non_exec_costs_exec. */
998 COSTS_N_INSNS (3), /* simple. */
999 COSTS_N_INSNS (3), /* flag_setting. */
1000 COSTS_N_INSNS (2), /* extend. */
1001 COSTS_N_INSNS (3), /* add. */
1002 COSTS_N_INSNS (2), /* extend_add. */
1003 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1007 0, /* simple (N/A). */
1008 0, /* flag_setting (N/A). */
1009 COSTS_N_INSNS (4), /* extend. */
1011 COSTS_N_INSNS (4), /* extend_add. */
1017 COSTS_N_INSNS (2), /* load. */
1018 COSTS_N_INSNS (2), /* load_sign_extend. */
1019 COSTS_N_INSNS (2), /* ldrd. */
1020 COSTS_N_INSNS (2), /* ldm_1st. */
1021 1, /* ldm_regs_per_insn_1st. */
1022 2, /* ldm_regs_per_insn_subsequent. */
1023 COSTS_N_INSNS (5), /* loadf. */
1024 COSTS_N_INSNS (5), /* loadd. */
1025 COSTS_N_INSNS (1), /* load_unaligned. */
1026 COSTS_N_INSNS (2), /* store. */
1027 COSTS_N_INSNS (2), /* strd. */
1028 COSTS_N_INSNS (2), /* stm_1st. */
1029 1, /* stm_regs_per_insn_1st. */
1030 2, /* stm_regs_per_insn_subsequent. */
1031 COSTS_N_INSNS (1), /* storef. */
1032 COSTS_N_INSNS (1), /* stored. */
1033 COSTS_N_INSNS (1), /* store_unaligned. */
1034 COSTS_N_INSNS (1), /* loadv. */
1035 COSTS_N_INSNS (1) /* storev. */
1040 COSTS_N_INSNS (14), /* div. */
1041 COSTS_N_INSNS (4), /* mult. */
1042 COSTS_N_INSNS (7), /* mult_addsub. */
1043 COSTS_N_INSNS (30), /* fma. */
1044 COSTS_N_INSNS (3), /* addsub. */
1045 COSTS_N_INSNS (1), /* fpconst. */
1046 COSTS_N_INSNS (1), /* neg. */
1047 COSTS_N_INSNS (3), /* compare. */
1048 COSTS_N_INSNS (3), /* widen. */
1049 COSTS_N_INSNS (3), /* narrow. */
1050 COSTS_N_INSNS (3), /* toint. */
1051 COSTS_N_INSNS (3), /* fromint. */
1052 COSTS_N_INSNS (3) /* roundint. */
1056 COSTS_N_INSNS (24), /* div. */
1057 COSTS_N_INSNS (5), /* mult. */
1058 COSTS_N_INSNS (8), /* mult_addsub. */
1059 COSTS_N_INSNS (30), /* fma. */
1060 COSTS_N_INSNS (3), /* addsub. */
1061 COSTS_N_INSNS (1), /* fpconst. */
1062 COSTS_N_INSNS (1), /* neg. */
1063 COSTS_N_INSNS (3), /* compare. */
1064 COSTS_N_INSNS (3), /* widen. */
1065 COSTS_N_INSNS (3), /* narrow. */
1066 COSTS_N_INSNS (3), /* toint. */
1067 COSTS_N_INSNS (3), /* fromint. */
1068 COSTS_N_INSNS (3) /* roundint. */
1073 COSTS_N_INSNS (1) /* alu. */
1077 const struct cpu_cost_table cortexa8_extra_costs
=
1083 COSTS_N_INSNS (1), /* shift. */
1085 COSTS_N_INSNS (1), /* arith_shift. */
1086 0, /* arith_shift_reg. */
1087 COSTS_N_INSNS (1), /* log_shift. */
1088 0, /* log_shift_reg. */
1090 0, /* extend_arith. */
1096 true /* non_exec_costs_exec. */
1101 COSTS_N_INSNS (1), /* simple. */
1102 COSTS_N_INSNS (1), /* flag_setting. */
1103 COSTS_N_INSNS (1), /* extend. */
1104 COSTS_N_INSNS (1), /* add. */
1105 COSTS_N_INSNS (1), /* extend_add. */
1106 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1110 0, /* simple (N/A). */
1111 0, /* flag_setting (N/A). */
1112 COSTS_N_INSNS (2), /* extend. */
1114 COSTS_N_INSNS (2), /* extend_add. */
1120 COSTS_N_INSNS (1), /* load. */
1121 COSTS_N_INSNS (1), /* load_sign_extend. */
1122 COSTS_N_INSNS (1), /* ldrd. */
1123 COSTS_N_INSNS (1), /* ldm_1st. */
1124 1, /* ldm_regs_per_insn_1st. */
1125 2, /* ldm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* loadf. */
1127 COSTS_N_INSNS (1), /* loadd. */
1128 COSTS_N_INSNS (1), /* load_unaligned. */
1129 COSTS_N_INSNS (1), /* store. */
1130 COSTS_N_INSNS (1), /* strd. */
1131 COSTS_N_INSNS (1), /* stm_1st. */
1132 1, /* stm_regs_per_insn_1st. */
1133 2, /* stm_regs_per_insn_subsequent. */
1134 COSTS_N_INSNS (1), /* storef. */
1135 COSTS_N_INSNS (1), /* stored. */
1136 COSTS_N_INSNS (1), /* store_unaligned. */
1137 COSTS_N_INSNS (1), /* loadv. */
1138 COSTS_N_INSNS (1) /* storev. */
1143 COSTS_N_INSNS (36), /* div. */
1144 COSTS_N_INSNS (11), /* mult. */
1145 COSTS_N_INSNS (20), /* mult_addsub. */
1146 COSTS_N_INSNS (30), /* fma. */
1147 COSTS_N_INSNS (9), /* addsub. */
1148 COSTS_N_INSNS (3), /* fpconst. */
1149 COSTS_N_INSNS (3), /* neg. */
1150 COSTS_N_INSNS (6), /* compare. */
1151 COSTS_N_INSNS (4), /* widen. */
1152 COSTS_N_INSNS (4), /* narrow. */
1153 COSTS_N_INSNS (8), /* toint. */
1154 COSTS_N_INSNS (8), /* fromint. */
1155 COSTS_N_INSNS (8) /* roundint. */
1159 COSTS_N_INSNS (64), /* div. */
1160 COSTS_N_INSNS (16), /* mult. */
1161 COSTS_N_INSNS (25), /* mult_addsub. */
1162 COSTS_N_INSNS (30), /* fma. */
1163 COSTS_N_INSNS (9), /* addsub. */
1164 COSTS_N_INSNS (3), /* fpconst. */
1165 COSTS_N_INSNS (3), /* neg. */
1166 COSTS_N_INSNS (6), /* compare. */
1167 COSTS_N_INSNS (6), /* widen. */
1168 COSTS_N_INSNS (6), /* narrow. */
1169 COSTS_N_INSNS (8), /* toint. */
1170 COSTS_N_INSNS (8), /* fromint. */
1171 COSTS_N_INSNS (8) /* roundint. */
1176 COSTS_N_INSNS (1) /* alu. */
1180 const struct cpu_cost_table cortexa5_extra_costs
=
1186 COSTS_N_INSNS (1), /* shift. */
1187 COSTS_N_INSNS (1), /* shift_reg. */
1188 COSTS_N_INSNS (1), /* arith_shift. */
1189 COSTS_N_INSNS (1), /* arith_shift_reg. */
1190 COSTS_N_INSNS (1), /* log_shift. */
1191 COSTS_N_INSNS (1), /* log_shift_reg. */
1192 COSTS_N_INSNS (1), /* extend. */
1193 COSTS_N_INSNS (1), /* extend_arith. */
1194 COSTS_N_INSNS (1), /* bfi. */
1195 COSTS_N_INSNS (1), /* bfx. */
1196 COSTS_N_INSNS (1), /* clz. */
1197 COSTS_N_INSNS (1), /* rev. */
1199 true /* non_exec_costs_exec. */
1206 COSTS_N_INSNS (1), /* flag_setting. */
1207 COSTS_N_INSNS (1), /* extend. */
1208 COSTS_N_INSNS (1), /* add. */
1209 COSTS_N_INSNS (1), /* extend_add. */
1210 COSTS_N_INSNS (7) /* idiv. */
1214 0, /* simple (N/A). */
1215 0, /* flag_setting (N/A). */
1216 COSTS_N_INSNS (1), /* extend. */
1218 COSTS_N_INSNS (2), /* extend_add. */
1224 COSTS_N_INSNS (1), /* load. */
1225 COSTS_N_INSNS (1), /* load_sign_extend. */
1226 COSTS_N_INSNS (6), /* ldrd. */
1227 COSTS_N_INSNS (1), /* ldm_1st. */
1228 1, /* ldm_regs_per_insn_1st. */
1229 2, /* ldm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* loadf. */
1231 COSTS_N_INSNS (4), /* loadd. */
1232 COSTS_N_INSNS (1), /* load_unaligned. */
1233 COSTS_N_INSNS (1), /* store. */
1234 COSTS_N_INSNS (3), /* strd. */
1235 COSTS_N_INSNS (1), /* stm_1st. */
1236 1, /* stm_regs_per_insn_1st. */
1237 2, /* stm_regs_per_insn_subsequent. */
1238 COSTS_N_INSNS (2), /* storef. */
1239 COSTS_N_INSNS (2), /* stored. */
1240 COSTS_N_INSNS (1), /* store_unaligned. */
1241 COSTS_N_INSNS (1), /* loadv. */
1242 COSTS_N_INSNS (1) /* storev. */
1247 COSTS_N_INSNS (15), /* div. */
1248 COSTS_N_INSNS (3), /* mult. */
1249 COSTS_N_INSNS (7), /* mult_addsub. */
1250 COSTS_N_INSNS (7), /* fma. */
1251 COSTS_N_INSNS (3), /* addsub. */
1252 COSTS_N_INSNS (3), /* fpconst. */
1253 COSTS_N_INSNS (3), /* neg. */
1254 COSTS_N_INSNS (3), /* compare. */
1255 COSTS_N_INSNS (3), /* widen. */
1256 COSTS_N_INSNS (3), /* narrow. */
1257 COSTS_N_INSNS (3), /* toint. */
1258 COSTS_N_INSNS (3), /* fromint. */
1259 COSTS_N_INSNS (3) /* roundint. */
1263 COSTS_N_INSNS (30), /* div. */
1264 COSTS_N_INSNS (6), /* mult. */
1265 COSTS_N_INSNS (10), /* mult_addsub. */
1266 COSTS_N_INSNS (7), /* fma. */
1267 COSTS_N_INSNS (3), /* addsub. */
1268 COSTS_N_INSNS (3), /* fpconst. */
1269 COSTS_N_INSNS (3), /* neg. */
1270 COSTS_N_INSNS (3), /* compare. */
1271 COSTS_N_INSNS (3), /* widen. */
1272 COSTS_N_INSNS (3), /* narrow. */
1273 COSTS_N_INSNS (3), /* toint. */
1274 COSTS_N_INSNS (3), /* fromint. */
1275 COSTS_N_INSNS (3) /* roundint. */
1280 COSTS_N_INSNS (1) /* alu. */
1285 const struct cpu_cost_table cortexa7_extra_costs
=
1291 COSTS_N_INSNS (1), /* shift. */
1292 COSTS_N_INSNS (1), /* shift_reg. */
1293 COSTS_N_INSNS (1), /* arith_shift. */
1294 COSTS_N_INSNS (1), /* arith_shift_reg. */
1295 COSTS_N_INSNS (1), /* log_shift. */
1296 COSTS_N_INSNS (1), /* log_shift_reg. */
1297 COSTS_N_INSNS (1), /* extend. */
1298 COSTS_N_INSNS (1), /* extend_arith. */
1299 COSTS_N_INSNS (1), /* bfi. */
1300 COSTS_N_INSNS (1), /* bfx. */
1301 COSTS_N_INSNS (1), /* clz. */
1302 COSTS_N_INSNS (1), /* rev. */
1304 true /* non_exec_costs_exec. */
1311 COSTS_N_INSNS (1), /* flag_setting. */
1312 COSTS_N_INSNS (1), /* extend. */
1313 COSTS_N_INSNS (1), /* add. */
1314 COSTS_N_INSNS (1), /* extend_add. */
1315 COSTS_N_INSNS (7) /* idiv. */
1319 0, /* simple (N/A). */
1320 0, /* flag_setting (N/A). */
1321 COSTS_N_INSNS (1), /* extend. */
1323 COSTS_N_INSNS (2), /* extend_add. */
1329 COSTS_N_INSNS (1), /* load. */
1330 COSTS_N_INSNS (1), /* load_sign_extend. */
1331 COSTS_N_INSNS (3), /* ldrd. */
1332 COSTS_N_INSNS (1), /* ldm_1st. */
1333 1, /* ldm_regs_per_insn_1st. */
1334 2, /* ldm_regs_per_insn_subsequent. */
1335 COSTS_N_INSNS (2), /* loadf. */
1336 COSTS_N_INSNS (2), /* loadd. */
1337 COSTS_N_INSNS (1), /* load_unaligned. */
1338 COSTS_N_INSNS (1), /* store. */
1339 COSTS_N_INSNS (3), /* strd. */
1340 COSTS_N_INSNS (1), /* stm_1st. */
1341 1, /* stm_regs_per_insn_1st. */
1342 2, /* stm_regs_per_insn_subsequent. */
1343 COSTS_N_INSNS (2), /* storef. */
1344 COSTS_N_INSNS (2), /* stored. */
1345 COSTS_N_INSNS (1), /* store_unaligned. */
1346 COSTS_N_INSNS (1), /* loadv. */
1347 COSTS_N_INSNS (1) /* storev. */
1352 COSTS_N_INSNS (15), /* div. */
1353 COSTS_N_INSNS (3), /* mult. */
1354 COSTS_N_INSNS (7), /* mult_addsub. */
1355 COSTS_N_INSNS (7), /* fma. */
1356 COSTS_N_INSNS (3), /* addsub. */
1357 COSTS_N_INSNS (3), /* fpconst. */
1358 COSTS_N_INSNS (3), /* neg. */
1359 COSTS_N_INSNS (3), /* compare. */
1360 COSTS_N_INSNS (3), /* widen. */
1361 COSTS_N_INSNS (3), /* narrow. */
1362 COSTS_N_INSNS (3), /* toint. */
1363 COSTS_N_INSNS (3), /* fromint. */
1364 COSTS_N_INSNS (3) /* roundint. */
1368 COSTS_N_INSNS (30), /* div. */
1369 COSTS_N_INSNS (6), /* mult. */
1370 COSTS_N_INSNS (10), /* mult_addsub. */
1371 COSTS_N_INSNS (7), /* fma. */
1372 COSTS_N_INSNS (3), /* addsub. */
1373 COSTS_N_INSNS (3), /* fpconst. */
1374 COSTS_N_INSNS (3), /* neg. */
1375 COSTS_N_INSNS (3), /* compare. */
1376 COSTS_N_INSNS (3), /* widen. */
1377 COSTS_N_INSNS (3), /* narrow. */
1378 COSTS_N_INSNS (3), /* toint. */
1379 COSTS_N_INSNS (3), /* fromint. */
1380 COSTS_N_INSNS (3) /* roundint. */
1385 COSTS_N_INSNS (1) /* alu. */
1389 const struct cpu_cost_table cortexa12_extra_costs
=
1396 COSTS_N_INSNS (1), /* shift_reg. */
1397 COSTS_N_INSNS (1), /* arith_shift. */
1398 COSTS_N_INSNS (1), /* arith_shift_reg. */
1399 COSTS_N_INSNS (1), /* log_shift. */
1400 COSTS_N_INSNS (1), /* log_shift_reg. */
1402 COSTS_N_INSNS (1), /* extend_arith. */
1404 COSTS_N_INSNS (1), /* bfx. */
1405 COSTS_N_INSNS (1), /* clz. */
1406 COSTS_N_INSNS (1), /* rev. */
1408 true /* non_exec_costs_exec. */
1413 COSTS_N_INSNS (2), /* simple. */
1414 COSTS_N_INSNS (3), /* flag_setting. */
1415 COSTS_N_INSNS (2), /* extend. */
1416 COSTS_N_INSNS (3), /* add. */
1417 COSTS_N_INSNS (2), /* extend_add. */
1418 COSTS_N_INSNS (18) /* idiv. */
1422 0, /* simple (N/A). */
1423 0, /* flag_setting (N/A). */
1424 COSTS_N_INSNS (3), /* extend. */
1426 COSTS_N_INSNS (3), /* extend_add. */
1432 COSTS_N_INSNS (3), /* load. */
1433 COSTS_N_INSNS (3), /* load_sign_extend. */
1434 COSTS_N_INSNS (3), /* ldrd. */
1435 COSTS_N_INSNS (3), /* ldm_1st. */
1436 1, /* ldm_regs_per_insn_1st. */
1437 2, /* ldm_regs_per_insn_subsequent. */
1438 COSTS_N_INSNS (3), /* loadf. */
1439 COSTS_N_INSNS (3), /* loadd. */
1440 0, /* load_unaligned. */
1444 1, /* stm_regs_per_insn_1st. */
1445 2, /* stm_regs_per_insn_subsequent. */
1446 COSTS_N_INSNS (2), /* storef. */
1447 COSTS_N_INSNS (2), /* stored. */
1448 0, /* store_unaligned. */
1449 COSTS_N_INSNS (1), /* loadv. */
1450 COSTS_N_INSNS (1) /* storev. */
1455 COSTS_N_INSNS (17), /* div. */
1456 COSTS_N_INSNS (4), /* mult. */
1457 COSTS_N_INSNS (8), /* mult_addsub. */
1458 COSTS_N_INSNS (8), /* fma. */
1459 COSTS_N_INSNS (4), /* addsub. */
1460 COSTS_N_INSNS (2), /* fpconst. */
1461 COSTS_N_INSNS (2), /* neg. */
1462 COSTS_N_INSNS (2), /* compare. */
1463 COSTS_N_INSNS (4), /* widen. */
1464 COSTS_N_INSNS (4), /* narrow. */
1465 COSTS_N_INSNS (4), /* toint. */
1466 COSTS_N_INSNS (4), /* fromint. */
1467 COSTS_N_INSNS (4) /* roundint. */
1471 COSTS_N_INSNS (31), /* div. */
1472 COSTS_N_INSNS (4), /* mult. */
1473 COSTS_N_INSNS (8), /* mult_addsub. */
1474 COSTS_N_INSNS (8), /* fma. */
1475 COSTS_N_INSNS (4), /* addsub. */
1476 COSTS_N_INSNS (2), /* fpconst. */
1477 COSTS_N_INSNS (2), /* neg. */
1478 COSTS_N_INSNS (2), /* compare. */
1479 COSTS_N_INSNS (4), /* widen. */
1480 COSTS_N_INSNS (4), /* narrow. */
1481 COSTS_N_INSNS (4), /* toint. */
1482 COSTS_N_INSNS (4), /* fromint. */
1483 COSTS_N_INSNS (4) /* roundint. */
1488 COSTS_N_INSNS (1) /* alu. */
1492 const struct cpu_cost_table cortexa15_extra_costs
=
1500 COSTS_N_INSNS (1), /* arith_shift. */
1501 COSTS_N_INSNS (1), /* arith_shift_reg. */
1502 COSTS_N_INSNS (1), /* log_shift. */
1503 COSTS_N_INSNS (1), /* log_shift_reg. */
1505 COSTS_N_INSNS (1), /* extend_arith. */
1506 COSTS_N_INSNS (1), /* bfi. */
1511 true /* non_exec_costs_exec. */
1516 COSTS_N_INSNS (2), /* simple. */
1517 COSTS_N_INSNS (3), /* flag_setting. */
1518 COSTS_N_INSNS (2), /* extend. */
1519 COSTS_N_INSNS (2), /* add. */
1520 COSTS_N_INSNS (2), /* extend_add. */
1521 COSTS_N_INSNS (18) /* idiv. */
1525 0, /* simple (N/A). */
1526 0, /* flag_setting (N/A). */
1527 COSTS_N_INSNS (3), /* extend. */
1529 COSTS_N_INSNS (3), /* extend_add. */
1535 COSTS_N_INSNS (3), /* load. */
1536 COSTS_N_INSNS (3), /* load_sign_extend. */
1537 COSTS_N_INSNS (3), /* ldrd. */
1538 COSTS_N_INSNS (4), /* ldm_1st. */
1539 1, /* ldm_regs_per_insn_1st. */
1540 2, /* ldm_regs_per_insn_subsequent. */
1541 COSTS_N_INSNS (4), /* loadf. */
1542 COSTS_N_INSNS (4), /* loadd. */
1543 0, /* load_unaligned. */
1546 COSTS_N_INSNS (1), /* stm_1st. */
1547 1, /* stm_regs_per_insn_1st. */
1548 2, /* stm_regs_per_insn_subsequent. */
1551 0, /* store_unaligned. */
1552 COSTS_N_INSNS (1), /* loadv. */
1553 COSTS_N_INSNS (1) /* storev. */
1558 COSTS_N_INSNS (17), /* div. */
1559 COSTS_N_INSNS (4), /* mult. */
1560 COSTS_N_INSNS (8), /* mult_addsub. */
1561 COSTS_N_INSNS (8), /* fma. */
1562 COSTS_N_INSNS (4), /* addsub. */
1563 COSTS_N_INSNS (2), /* fpconst. */
1564 COSTS_N_INSNS (2), /* neg. */
1565 COSTS_N_INSNS (5), /* compare. */
1566 COSTS_N_INSNS (4), /* widen. */
1567 COSTS_N_INSNS (4), /* narrow. */
1568 COSTS_N_INSNS (4), /* toint. */
1569 COSTS_N_INSNS (4), /* fromint. */
1570 COSTS_N_INSNS (4) /* roundint. */
1574 COSTS_N_INSNS (31), /* div. */
1575 COSTS_N_INSNS (4), /* mult. */
1576 COSTS_N_INSNS (8), /* mult_addsub. */
1577 COSTS_N_INSNS (8), /* fma. */
1578 COSTS_N_INSNS (4), /* addsub. */
1579 COSTS_N_INSNS (2), /* fpconst. */
1580 COSTS_N_INSNS (2), /* neg. */
1581 COSTS_N_INSNS (2), /* compare. */
1582 COSTS_N_INSNS (4), /* widen. */
1583 COSTS_N_INSNS (4), /* narrow. */
1584 COSTS_N_INSNS (4), /* toint. */
1585 COSTS_N_INSNS (4), /* fromint. */
1586 COSTS_N_INSNS (4) /* roundint. */
1591 COSTS_N_INSNS (1) /* alu. */
1595 const struct cpu_cost_table v7m_extra_costs
=
1603 0, /* arith_shift. */
1604 COSTS_N_INSNS (1), /* arith_shift_reg. */
1606 COSTS_N_INSNS (1), /* log_shift_reg. */
1608 COSTS_N_INSNS (1), /* extend_arith. */
1613 COSTS_N_INSNS (1), /* non_exec. */
1614 false /* non_exec_costs_exec. */
1619 COSTS_N_INSNS (1), /* simple. */
1620 COSTS_N_INSNS (1), /* flag_setting. */
1621 COSTS_N_INSNS (2), /* extend. */
1622 COSTS_N_INSNS (1), /* add. */
1623 COSTS_N_INSNS (3), /* extend_add. */
1624 COSTS_N_INSNS (8) /* idiv. */
1628 0, /* simple (N/A). */
1629 0, /* flag_setting (N/A). */
1630 COSTS_N_INSNS (2), /* extend. */
1632 COSTS_N_INSNS (3), /* extend_add. */
1638 COSTS_N_INSNS (2), /* load. */
1639 0, /* load_sign_extend. */
1640 COSTS_N_INSNS (3), /* ldrd. */
1641 COSTS_N_INSNS (2), /* ldm_1st. */
1642 1, /* ldm_regs_per_insn_1st. */
1643 1, /* ldm_regs_per_insn_subsequent. */
1644 COSTS_N_INSNS (2), /* loadf. */
1645 COSTS_N_INSNS (3), /* loadd. */
1646 COSTS_N_INSNS (1), /* load_unaligned. */
1647 COSTS_N_INSNS (2), /* store. */
1648 COSTS_N_INSNS (3), /* strd. */
1649 COSTS_N_INSNS (2), /* stm_1st. */
1650 1, /* stm_regs_per_insn_1st. */
1651 1, /* stm_regs_per_insn_subsequent. */
1652 COSTS_N_INSNS (2), /* storef. */
1653 COSTS_N_INSNS (3), /* stored. */
1654 COSTS_N_INSNS (1), /* store_unaligned. */
1655 COSTS_N_INSNS (1), /* loadv. */
1656 COSTS_N_INSNS (1) /* storev. */
1661 COSTS_N_INSNS (7), /* div. */
1662 COSTS_N_INSNS (2), /* mult. */
1663 COSTS_N_INSNS (5), /* mult_addsub. */
1664 COSTS_N_INSNS (3), /* fma. */
1665 COSTS_N_INSNS (1), /* addsub. */
1677 COSTS_N_INSNS (15), /* div. */
1678 COSTS_N_INSNS (5), /* mult. */
1679 COSTS_N_INSNS (7), /* mult_addsub. */
1680 COSTS_N_INSNS (7), /* fma. */
1681 COSTS_N_INSNS (3), /* addsub. */
1694 COSTS_N_INSNS (1) /* alu. */
1698 const struct tune_params arm_slowmul_tune
=
1700 &generic_extra_costs
, /* Insn extra costs. */
1701 NULL
, /* Sched adj cost. */
1702 arm_default_branch_cost
,
1703 &arm_default_vec_cost
,
1704 3, /* Constant limit. */
1705 5, /* Max cond insns. */
1706 8, /* Memset max inline. */
1707 1, /* Issue rate. */
1708 ARM_PREFETCH_NOT_BENEFICIAL
,
1709 tune_params::PREF_CONST_POOL_TRUE
,
1710 tune_params::PREF_LDRD_FALSE
,
1711 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1712 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1713 tune_params::DISPARAGE_FLAGS_NEITHER
,
1714 tune_params::PREF_NEON_64_FALSE
,
1715 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1716 tune_params::FUSE_NOTHING
,
1717 tune_params::SCHED_AUTOPREF_OFF
1720 const struct tune_params arm_fastmul_tune
=
1722 &generic_extra_costs
, /* Insn extra costs. */
1723 NULL
, /* Sched adj cost. */
1724 arm_default_branch_cost
,
1725 &arm_default_vec_cost
,
1726 1, /* Constant limit. */
1727 5, /* Max cond insns. */
1728 8, /* Memset max inline. */
1729 1, /* Issue rate. */
1730 ARM_PREFETCH_NOT_BENEFICIAL
,
1731 tune_params::PREF_CONST_POOL_TRUE
,
1732 tune_params::PREF_LDRD_FALSE
,
1733 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1734 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1735 tune_params::DISPARAGE_FLAGS_NEITHER
,
1736 tune_params::PREF_NEON_64_FALSE
,
1737 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1738 tune_params::FUSE_NOTHING
,
1739 tune_params::SCHED_AUTOPREF_OFF
1742 /* StrongARM has early execution of branches, so a sequence that is worth
1743 skipping is shorter. Set max_insns_skipped to a lower value. */
1745 const struct tune_params arm_strongarm_tune
=
1747 &generic_extra_costs
, /* Insn extra costs. */
1748 NULL
, /* Sched adj cost. */
1749 arm_default_branch_cost
,
1750 &arm_default_vec_cost
,
1751 1, /* Constant limit. */
1752 3, /* Max cond insns. */
1753 8, /* Memset max inline. */
1754 1, /* Issue rate. */
1755 ARM_PREFETCH_NOT_BENEFICIAL
,
1756 tune_params::PREF_CONST_POOL_TRUE
,
1757 tune_params::PREF_LDRD_FALSE
,
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1759 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1760 tune_params::DISPARAGE_FLAGS_NEITHER
,
1761 tune_params::PREF_NEON_64_FALSE
,
1762 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1763 tune_params::FUSE_NOTHING
,
1764 tune_params::SCHED_AUTOPREF_OFF
1767 const struct tune_params arm_xscale_tune
=
1769 &generic_extra_costs
, /* Insn extra costs. */
1770 xscale_sched_adjust_cost
,
1771 arm_default_branch_cost
,
1772 &arm_default_vec_cost
,
1773 2, /* Constant limit. */
1774 3, /* Max cond insns. */
1775 8, /* Memset max inline. */
1776 1, /* Issue rate. */
1777 ARM_PREFETCH_NOT_BENEFICIAL
,
1778 tune_params::PREF_CONST_POOL_TRUE
,
1779 tune_params::PREF_LDRD_FALSE
,
1780 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1781 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1782 tune_params::DISPARAGE_FLAGS_NEITHER
,
1783 tune_params::PREF_NEON_64_FALSE
,
1784 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1785 tune_params::FUSE_NOTHING
,
1786 tune_params::SCHED_AUTOPREF_OFF
1789 const struct tune_params arm_9e_tune
=
1791 &generic_extra_costs
, /* Insn extra costs. */
1792 NULL
, /* Sched adj cost. */
1793 arm_default_branch_cost
,
1794 &arm_default_vec_cost
,
1795 1, /* Constant limit. */
1796 5, /* Max cond insns. */
1797 8, /* Memset max inline. */
1798 1, /* Issue rate. */
1799 ARM_PREFETCH_NOT_BENEFICIAL
,
1800 tune_params::PREF_CONST_POOL_TRUE
,
1801 tune_params::PREF_LDRD_FALSE
,
1802 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1803 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1804 tune_params::DISPARAGE_FLAGS_NEITHER
,
1805 tune_params::PREF_NEON_64_FALSE
,
1806 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1807 tune_params::FUSE_NOTHING
,
1808 tune_params::SCHED_AUTOPREF_OFF
1811 const struct tune_params arm_marvell_pj4_tune
=
1813 &generic_extra_costs
, /* Insn extra costs. */
1814 NULL
, /* Sched adj cost. */
1815 arm_default_branch_cost
,
1816 &arm_default_vec_cost
,
1817 1, /* Constant limit. */
1818 5, /* Max cond insns. */
1819 8, /* Memset max inline. */
1820 2, /* Issue rate. */
1821 ARM_PREFETCH_NOT_BENEFICIAL
,
1822 tune_params::PREF_CONST_POOL_TRUE
,
1823 tune_params::PREF_LDRD_FALSE
,
1824 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1825 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1826 tune_params::DISPARAGE_FLAGS_NEITHER
,
1827 tune_params::PREF_NEON_64_FALSE
,
1828 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1829 tune_params::FUSE_NOTHING
,
1830 tune_params::SCHED_AUTOPREF_OFF
1833 const struct tune_params arm_v6t2_tune
=
1835 &generic_extra_costs
, /* Insn extra costs. */
1836 NULL
, /* Sched adj cost. */
1837 arm_default_branch_cost
,
1838 &arm_default_vec_cost
,
1839 1, /* Constant limit. */
1840 5, /* Max cond insns. */
1841 8, /* Memset max inline. */
1842 1, /* Issue rate. */
1843 ARM_PREFETCH_NOT_BENEFICIAL
,
1844 tune_params::PREF_CONST_POOL_FALSE
,
1845 tune_params::PREF_LDRD_FALSE
,
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1847 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1848 tune_params::DISPARAGE_FLAGS_NEITHER
,
1849 tune_params::PREF_NEON_64_FALSE
,
1850 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1851 tune_params::FUSE_NOTHING
,
1852 tune_params::SCHED_AUTOPREF_OFF
1856 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1857 const struct tune_params arm_cortex_tune
=
1859 &generic_extra_costs
,
1860 NULL
, /* Sched adj cost. */
1861 arm_default_branch_cost
,
1862 &arm_default_vec_cost
,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 2, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL
,
1868 tune_params::PREF_CONST_POOL_FALSE
,
1869 tune_params::PREF_LDRD_FALSE
,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER
,
1873 tune_params::PREF_NEON_64_FALSE
,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1875 tune_params::FUSE_NOTHING
,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_cortex_a8_tune
=
1881 &cortexa8_extra_costs
,
1882 NULL
, /* Sched adj cost. */
1883 arm_default_branch_cost
,
1884 &arm_default_vec_cost
,
1885 1, /* Constant limit. */
1886 5, /* Max cond insns. */
1887 8, /* Memset max inline. */
1888 2, /* Issue rate. */
1889 ARM_PREFETCH_NOT_BENEFICIAL
,
1890 tune_params::PREF_CONST_POOL_FALSE
,
1891 tune_params::PREF_LDRD_FALSE
,
1892 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1894 tune_params::DISPARAGE_FLAGS_NEITHER
,
1895 tune_params::PREF_NEON_64_FALSE
,
1896 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1897 tune_params::FUSE_NOTHING
,
1898 tune_params::SCHED_AUTOPREF_OFF
1901 const struct tune_params arm_cortex_a7_tune
=
1903 &cortexa7_extra_costs
,
1904 NULL
, /* Sched adj cost. */
1905 arm_default_branch_cost
,
1906 &arm_default_vec_cost
,
1907 1, /* Constant limit. */
1908 5, /* Max cond insns. */
1909 8, /* Memset max inline. */
1910 2, /* Issue rate. */
1911 ARM_PREFETCH_NOT_BENEFICIAL
,
1912 tune_params::PREF_CONST_POOL_FALSE
,
1913 tune_params::PREF_LDRD_FALSE
,
1914 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1915 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1916 tune_params::DISPARAGE_FLAGS_NEITHER
,
1917 tune_params::PREF_NEON_64_FALSE
,
1918 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1919 tune_params::FUSE_NOTHING
,
1920 tune_params::SCHED_AUTOPREF_OFF
1923 const struct tune_params arm_cortex_a15_tune
=
1925 &cortexa15_extra_costs
,
1926 NULL
, /* Sched adj cost. */
1927 arm_default_branch_cost
,
1928 &arm_default_vec_cost
,
1929 1, /* Constant limit. */
1930 2, /* Max cond insns. */
1931 8, /* Memset max inline. */
1932 3, /* Issue rate. */
1933 ARM_PREFETCH_NOT_BENEFICIAL
,
1934 tune_params::PREF_CONST_POOL_FALSE
,
1935 tune_params::PREF_LDRD_TRUE
,
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1937 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1938 tune_params::DISPARAGE_FLAGS_ALL
,
1939 tune_params::PREF_NEON_64_FALSE
,
1940 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1941 tune_params::FUSE_NOTHING
,
1942 tune_params::SCHED_AUTOPREF_FULL
1945 const struct tune_params arm_cortex_a35_tune
=
1947 &cortexa53_extra_costs
,
1948 NULL
, /* Sched adj cost. */
1949 arm_default_branch_cost
,
1950 &arm_default_vec_cost
,
1951 1, /* Constant limit. */
1952 5, /* Max cond insns. */
1953 8, /* Memset max inline. */
1954 1, /* Issue rate. */
1955 ARM_PREFETCH_NOT_BENEFICIAL
,
1956 tune_params::PREF_CONST_POOL_FALSE
,
1957 tune_params::PREF_LDRD_FALSE
,
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1960 tune_params::DISPARAGE_FLAGS_NEITHER
,
1961 tune_params::PREF_NEON_64_FALSE
,
1962 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1963 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
1964 tune_params::SCHED_AUTOPREF_OFF
1967 const struct tune_params arm_cortex_a53_tune
=
1969 &cortexa53_extra_costs
,
1970 NULL
, /* Sched adj cost. */
1971 arm_default_branch_cost
,
1972 &arm_default_vec_cost
,
1973 1, /* Constant limit. */
1974 5, /* Max cond insns. */
1975 8, /* Memset max inline. */
1976 2, /* Issue rate. */
1977 ARM_PREFETCH_NOT_BENEFICIAL
,
1978 tune_params::PREF_CONST_POOL_FALSE
,
1979 tune_params::PREF_LDRD_FALSE
,
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1982 tune_params::DISPARAGE_FLAGS_NEITHER
,
1983 tune_params::PREF_NEON_64_FALSE
,
1984 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1985 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
1986 tune_params::SCHED_AUTOPREF_OFF
1989 const struct tune_params arm_cortex_a57_tune
=
1991 &cortexa57_extra_costs
,
1992 NULL
, /* Sched adj cost. */
1993 arm_default_branch_cost
,
1994 &arm_default_vec_cost
,
1995 1, /* Constant limit. */
1996 2, /* Max cond insns. */
1997 8, /* Memset max inline. */
1998 3, /* Issue rate. */
1999 ARM_PREFETCH_NOT_BENEFICIAL
,
2000 tune_params::PREF_CONST_POOL_FALSE
,
2001 tune_params::PREF_LDRD_TRUE
,
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2004 tune_params::DISPARAGE_FLAGS_ALL
,
2005 tune_params::PREF_NEON_64_FALSE
,
2006 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2007 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2008 tune_params::SCHED_AUTOPREF_FULL
2011 const struct tune_params arm_exynosm1_tune
=
2013 &exynosm1_extra_costs
,
2014 NULL
, /* Sched adj cost. */
2015 arm_default_branch_cost
,
2016 &arm_default_vec_cost
,
2017 1, /* Constant limit. */
2018 2, /* Max cond insns. */
2019 8, /* Memset max inline. */
2020 3, /* Issue rate. */
2021 ARM_PREFETCH_NOT_BENEFICIAL
,
2022 tune_params::PREF_CONST_POOL_FALSE
,
2023 tune_params::PREF_LDRD_TRUE
,
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2025 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2026 tune_params::DISPARAGE_FLAGS_ALL
,
2027 tune_params::PREF_NEON_64_FALSE
,
2028 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2029 tune_params::FUSE_NOTHING
,
2030 tune_params::SCHED_AUTOPREF_OFF
2033 const struct tune_params arm_xgene1_tune
=
2035 &xgene1_extra_costs
,
2036 NULL
, /* Sched adj cost. */
2037 arm_default_branch_cost
,
2038 &arm_default_vec_cost
,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 32, /* Memset max inline. */
2042 4, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL
,
2044 tune_params::PREF_CONST_POOL_FALSE
,
2045 tune_params::PREF_LDRD_TRUE
,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL
,
2049 tune_params::PREF_NEON_64_FALSE
,
2050 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2051 tune_params::FUSE_NOTHING
,
2052 tune_params::SCHED_AUTOPREF_OFF
2055 const struct tune_params arm_qdf24xx_tune
=
2057 &qdf24xx_extra_costs
,
2058 NULL
, /* Scheduler cost adjustment. */
2059 arm_default_branch_cost
,
2060 &arm_default_vec_cost
, /* Vectorizer costs. */
2061 1, /* Constant limit. */
2062 2, /* Max cond insns. */
2063 8, /* Memset max inline. */
2064 4, /* Issue rate. */
2065 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2066 tune_params::PREF_CONST_POOL_FALSE
,
2067 tune_params::PREF_LDRD_TRUE
,
2068 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2070 tune_params::DISPARAGE_FLAGS_ALL
,
2071 tune_params::PREF_NEON_64_FALSE
,
2072 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2073 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2074 tune_params::SCHED_AUTOPREF_FULL
2077 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2078 less appealing. Set max_insns_skipped to a low value. */
2080 const struct tune_params arm_cortex_a5_tune
=
2082 &cortexa5_extra_costs
,
2083 NULL
, /* Sched adj cost. */
2084 arm_cortex_a5_branch_cost
,
2085 &arm_default_vec_cost
,
2086 1, /* Constant limit. */
2087 1, /* Max cond insns. */
2088 8, /* Memset max inline. */
2089 2, /* Issue rate. */
2090 ARM_PREFETCH_NOT_BENEFICIAL
,
2091 tune_params::PREF_CONST_POOL_FALSE
,
2092 tune_params::PREF_LDRD_FALSE
,
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2094 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2095 tune_params::DISPARAGE_FLAGS_NEITHER
,
2096 tune_params::PREF_NEON_64_FALSE
,
2097 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2098 tune_params::FUSE_NOTHING
,
2099 tune_params::SCHED_AUTOPREF_OFF
2102 const struct tune_params arm_cortex_a9_tune
=
2104 &cortexa9_extra_costs
,
2105 cortex_a9_sched_adjust_cost
,
2106 arm_default_branch_cost
,
2107 &arm_default_vec_cost
,
2108 1, /* Constant limit. */
2109 5, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 2, /* Issue rate. */
2112 ARM_PREFETCH_BENEFICIAL(4,32,32),
2113 tune_params::PREF_CONST_POOL_FALSE
,
2114 tune_params::PREF_LDRD_FALSE
,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_NEITHER
,
2118 tune_params::PREF_NEON_64_FALSE
,
2119 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2120 tune_params::FUSE_NOTHING
,
2121 tune_params::SCHED_AUTOPREF_OFF
2124 const struct tune_params arm_cortex_a12_tune
=
2126 &cortexa12_extra_costs
,
2127 NULL
, /* Sched adj cost. */
2128 arm_default_branch_cost
,
2129 &arm_default_vec_cost
, /* Vectorizer costs. */
2130 1, /* Constant limit. */
2131 2, /* Max cond insns. */
2132 8, /* Memset max inline. */
2133 2, /* Issue rate. */
2134 ARM_PREFETCH_NOT_BENEFICIAL
,
2135 tune_params::PREF_CONST_POOL_FALSE
,
2136 tune_params::PREF_LDRD_TRUE
,
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2138 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2139 tune_params::DISPARAGE_FLAGS_ALL
,
2140 tune_params::PREF_NEON_64_FALSE
,
2141 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2142 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2143 tune_params::SCHED_AUTOPREF_OFF
2146 const struct tune_params arm_cortex_a73_tune
=
2148 &cortexa57_extra_costs
,
2149 NULL
, /* Sched adj cost. */
2150 arm_default_branch_cost
,
2151 &arm_default_vec_cost
, /* Vectorizer costs. */
2152 1, /* Constant limit. */
2153 2, /* Max cond insns. */
2154 8, /* Memset max inline. */
2155 2, /* Issue rate. */
2156 ARM_PREFETCH_NOT_BENEFICIAL
,
2157 tune_params::PREF_CONST_POOL_FALSE
,
2158 tune_params::PREF_LDRD_TRUE
,
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2160 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2161 tune_params::DISPARAGE_FLAGS_ALL
,
2162 tune_params::PREF_NEON_64_FALSE
,
2163 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2164 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2165 tune_params::SCHED_AUTOPREF_FULL
2168 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2169 cycle to execute each. An LDR from the constant pool also takes two cycles
2170 to execute, but mildly increases pipelining opportunity (consecutive
2171 loads/stores can be pipelined together, saving one cycle), and may also
2172 improve icache utilisation. Hence we prefer the constant pool for such
2175 const struct tune_params arm_v7m_tune
=
2178 NULL
, /* Sched adj cost. */
2179 arm_cortex_m_branch_cost
,
2180 &arm_default_vec_cost
,
2181 1, /* Constant limit. */
2182 2, /* Max cond insns. */
2183 8, /* Memset max inline. */
2184 1, /* Issue rate. */
2185 ARM_PREFETCH_NOT_BENEFICIAL
,
2186 tune_params::PREF_CONST_POOL_TRUE
,
2187 tune_params::PREF_LDRD_FALSE
,
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2189 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2190 tune_params::DISPARAGE_FLAGS_NEITHER
,
2191 tune_params::PREF_NEON_64_FALSE
,
2192 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2193 tune_params::FUSE_NOTHING
,
2194 tune_params::SCHED_AUTOPREF_OFF
2197 /* Cortex-M7 tuning. */
2199 const struct tune_params arm_cortex_m7_tune
=
2202 NULL
, /* Sched adj cost. */
2203 arm_cortex_m7_branch_cost
,
2204 &arm_default_vec_cost
,
2205 0, /* Constant limit. */
2206 1, /* Max cond insns. */
2207 8, /* Memset max inline. */
2208 2, /* Issue rate. */
2209 ARM_PREFETCH_NOT_BENEFICIAL
,
2210 tune_params::PREF_CONST_POOL_TRUE
,
2211 tune_params::PREF_LDRD_FALSE
,
2212 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2213 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2214 tune_params::DISPARAGE_FLAGS_NEITHER
,
2215 tune_params::PREF_NEON_64_FALSE
,
2216 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2217 tune_params::FUSE_NOTHING
,
2218 tune_params::SCHED_AUTOPREF_OFF
2221 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2222 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2224 const struct tune_params arm_v6m_tune
=
2226 &generic_extra_costs
, /* Insn extra costs. */
2227 NULL
, /* Sched adj cost. */
2228 arm_default_branch_cost
,
2229 &arm_default_vec_cost
, /* Vectorizer costs. */
2230 1, /* Constant limit. */
2231 5, /* Max cond insns. */
2232 8, /* Memset max inline. */
2233 1, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL
,
2235 tune_params::PREF_CONST_POOL_FALSE
,
2236 tune_params::PREF_LDRD_FALSE
,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_NEITHER
,
2240 tune_params::PREF_NEON_64_FALSE
,
2241 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2242 tune_params::FUSE_NOTHING
,
2243 tune_params::SCHED_AUTOPREF_OFF
2246 const struct tune_params arm_fa726te_tune
=
2248 &generic_extra_costs
, /* Insn extra costs. */
2249 fa726te_sched_adjust_cost
,
2250 arm_default_branch_cost
,
2251 &arm_default_vec_cost
,
2252 1, /* Constant limit. */
2253 5, /* Max cond insns. */
2254 8, /* Memset max inline. */
2255 2, /* Issue rate. */
2256 ARM_PREFETCH_NOT_BENEFICIAL
,
2257 tune_params::PREF_CONST_POOL_TRUE
,
2258 tune_params::PREF_LDRD_FALSE
,
2259 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2260 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2261 tune_params::DISPARAGE_FLAGS_NEITHER
,
2262 tune_params::PREF_NEON_64_FALSE
,
2263 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2264 tune_params::FUSE_NOTHING
,
2265 tune_params::SCHED_AUTOPREF_OFF
2269 /* Not all of these give usefully different compilation alternatives,
2270 but there is no simple way of generalizing them. */
2271 static const struct processors all_cores
[] =
2274 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2275 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2276 FLAGS, &arm_##COSTS##_tune},
2277 #include "arm-cores.def"
2279 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2282 static const struct processors all_architectures
[] =
2284 /* ARM Architectures */
2285 /* We don't specify tuning costs here as it will be figured out
2288 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2289 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2290 #include "arm-arches.def"
2292 {NULL
, arm_none
, NULL
, BASE_ARCH_0
, ARM_FSET_EMPTY
, NULL
}
2296 /* These are populated as commandline arguments are processed, or NULL
2297 if not specified. */
2298 static const struct processors
*arm_selected_arch
;
2299 static const struct processors
*arm_selected_cpu
;
2300 static const struct processors
*arm_selected_tune
;
2302 /* The name of the preprocessor macro to define for this architecture. PROFILE
2303 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2304 is thus chosen to be big enough to hold the longest architecture name. */
2306 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2308 /* Available values for -mfpu=. */
2310 const struct arm_fpu_desc all_fpus
[] =
2312 #define ARM_FPU(NAME, REV, VFP_REGS, FEATURES) \
2313 { NAME, REV, VFP_REGS, FEATURES },
2314 #include "arm-fpus.def"
2318 /* Supported TLS relocations. */
2326 TLS_DESCSEQ
/* GNU scheme */
2329 /* The maximum number of insns to be used when loading a constant. */
2331 arm_constant_limit (bool size_p
)
2333 return size_p
? 1 : current_tune
->constant_limit
;
2336 /* Emit an insn that's a simple single-set. Both the operands must be known
2338 inline static rtx_insn
*
2339 emit_set_insn (rtx x
, rtx y
)
2341 return emit_insn (gen_rtx_SET (x
, y
));
2344 /* Return the number of bits set in VALUE. */
2346 bit_count (unsigned long value
)
2348 unsigned long count
= 0;
2353 value
&= value
- 1; /* Clear the least-significant set bit. */
2359 /* Return the number of features in feature-set SET. */
2361 feature_count (const arm_feature_set
* set
)
2363 return (bit_count (ARM_FSET_CPU1 (*set
))
2364 + bit_count (ARM_FSET_CPU2 (*set
)));
2371 } arm_fixed_mode_set
;
2373 /* A small helper for setting fixed-point library libfuncs. */
2376 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2377 const char *funcname
, const char *modename
,
2382 if (num_suffix
== 0)
2383 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2385 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2387 set_optab_libfunc (optable
, mode
, buffer
);
2391 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2392 machine_mode from
, const char *funcname
,
2393 const char *toname
, const char *fromname
)
2396 const char *maybe_suffix_2
= "";
2398 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2399 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2400 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2401 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2402 maybe_suffix_2
= "2";
2404 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2407 set_conv_libfunc (optable
, to
, from
, buffer
);
2410 /* Set up library functions unique to ARM. */
2413 arm_init_libfuncs (void)
2415 /* For Linux, we have access to kernel support for atomic operations. */
2416 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2417 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2419 /* There are no special library functions unless we are using the
2424 /* The functions below are described in Section 4 of the "Run-Time
2425 ABI for the ARM architecture", Version 1.0. */
2427 /* Double-precision floating-point arithmetic. Table 2. */
2428 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2429 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2430 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2431 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2432 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2434 /* Double-precision comparisons. Table 3. */
2435 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2436 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2437 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2438 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2439 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2440 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2441 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2443 /* Single-precision floating-point arithmetic. Table 4. */
2444 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2445 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2446 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2447 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2448 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2450 /* Single-precision comparisons. Table 5. */
2451 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2452 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2453 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2454 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2455 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2456 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2457 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2459 /* Floating-point to integer conversions. Table 6. */
2460 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2461 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2462 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2463 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2464 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2465 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2466 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2467 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2469 /* Conversions between floating types. Table 7. */
2470 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2471 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2473 /* Integer to floating-point conversions. Table 8. */
2474 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2475 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2476 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2477 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2478 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2479 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2480 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2481 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2483 /* Long long. Table 9. */
2484 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2485 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2486 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2487 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2488 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2489 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2490 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2491 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2493 /* Integer (32/32->32) division. \S 4.3.1. */
2494 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2495 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2497 /* The divmod functions are designed so that they can be used for
2498 plain division, even though they return both the quotient and the
2499 remainder. The quotient is returned in the usual location (i.e.,
2500 r0 for SImode, {r0, r1} for DImode), just as would be expected
2501 for an ordinary division routine. Because the AAPCS calling
2502 conventions specify that all of { r0, r1, r2, r3 } are
2503 callee-saved registers, there is no need to tell the compiler
2504 explicitly that those registers are clobbered by these
2506 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2507 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2509 /* For SImode division the ABI provides div-without-mod routines,
2510 which are faster. */
2511 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2512 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2514 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2515 divmod libcalls instead. */
2516 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2517 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2518 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2519 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2521 /* Half-precision float operations. The compiler handles all operations
2522 with NULL libfuncs by converting the SFmode. */
2523 switch (arm_fp16_format
)
2525 case ARM_FP16_FORMAT_IEEE
:
2526 case ARM_FP16_FORMAT_ALTERNATIVE
:
2529 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2530 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2532 : "__gnu_f2h_alternative"));
2533 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2534 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2536 : "__gnu_h2f_alternative"));
2539 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2540 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2541 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2542 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2543 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2546 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2547 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2548 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2549 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2550 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2551 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2552 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2559 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2561 const arm_fixed_mode_set fixed_arith_modes
[] =
2582 const arm_fixed_mode_set fixed_conv_modes
[] =
2612 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2614 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2615 "add", fixed_arith_modes
[i
].name
, 3);
2616 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2617 "ssadd", fixed_arith_modes
[i
].name
, 3);
2618 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2619 "usadd", fixed_arith_modes
[i
].name
, 3);
2620 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2621 "sub", fixed_arith_modes
[i
].name
, 3);
2622 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2623 "sssub", fixed_arith_modes
[i
].name
, 3);
2624 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2625 "ussub", fixed_arith_modes
[i
].name
, 3);
2626 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2627 "mul", fixed_arith_modes
[i
].name
, 3);
2628 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2629 "ssmul", fixed_arith_modes
[i
].name
, 3);
2630 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2631 "usmul", fixed_arith_modes
[i
].name
, 3);
2632 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2633 "div", fixed_arith_modes
[i
].name
, 3);
2634 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2635 "udiv", fixed_arith_modes
[i
].name
, 3);
2636 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2637 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2638 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2639 "usdiv", fixed_arith_modes
[i
].name
, 3);
2640 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2641 "neg", fixed_arith_modes
[i
].name
, 2);
2642 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2643 "ssneg", fixed_arith_modes
[i
].name
, 2);
2644 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2645 "usneg", fixed_arith_modes
[i
].name
, 2);
2646 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2647 "ashl", fixed_arith_modes
[i
].name
, 3);
2648 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2649 "ashr", fixed_arith_modes
[i
].name
, 3);
2650 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2651 "lshr", fixed_arith_modes
[i
].name
, 3);
2652 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2653 "ssashl", fixed_arith_modes
[i
].name
, 3);
2654 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2655 "usashl", fixed_arith_modes
[i
].name
, 3);
2656 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2657 "cmp", fixed_arith_modes
[i
].name
, 2);
2660 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2661 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2664 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2665 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2668 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2669 fixed_conv_modes
[j
].mode
, "fract",
2670 fixed_conv_modes
[i
].name
,
2671 fixed_conv_modes
[j
].name
);
2672 arm_set_fixed_conv_libfunc (satfract_optab
,
2673 fixed_conv_modes
[i
].mode
,
2674 fixed_conv_modes
[j
].mode
, "satfract",
2675 fixed_conv_modes
[i
].name
,
2676 fixed_conv_modes
[j
].name
);
2677 arm_set_fixed_conv_libfunc (fractuns_optab
,
2678 fixed_conv_modes
[i
].mode
,
2679 fixed_conv_modes
[j
].mode
, "fractuns",
2680 fixed_conv_modes
[i
].name
,
2681 fixed_conv_modes
[j
].name
);
2682 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2683 fixed_conv_modes
[i
].mode
,
2684 fixed_conv_modes
[j
].mode
, "satfractuns",
2685 fixed_conv_modes
[i
].name
,
2686 fixed_conv_modes
[j
].name
);
2690 if (TARGET_AAPCS_BASED
)
2691 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2694 /* On AAPCS systems, this is the "struct __va_list". */
2695 static GTY(()) tree va_list_type
;
2697 /* Return the type to use as __builtin_va_list. */
2699 arm_build_builtin_va_list (void)
2704 if (!TARGET_AAPCS_BASED
)
2705 return std_build_builtin_va_list ();
2707 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2715 The C Library ABI further reinforces this definition in \S
2718 We must follow this definition exactly. The structure tag
2719 name is visible in C++ mangled names, and thus forms a part
2720 of the ABI. The field name may be used by people who
2721 #include <stdarg.h>. */
2722 /* Create the type. */
2723 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2724 /* Give it the required name. */
2725 va_list_name
= build_decl (BUILTINS_LOCATION
,
2727 get_identifier ("__va_list"),
2729 DECL_ARTIFICIAL (va_list_name
) = 1;
2730 TYPE_NAME (va_list_type
) = va_list_name
;
2731 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2732 /* Create the __ap field. */
2733 ap_field
= build_decl (BUILTINS_LOCATION
,
2735 get_identifier ("__ap"),
2737 DECL_ARTIFICIAL (ap_field
) = 1;
2738 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2739 TYPE_FIELDS (va_list_type
) = ap_field
;
2740 /* Compute its layout. */
2741 layout_type (va_list_type
);
2743 return va_list_type
;
2746 /* Return an expression of type "void *" pointing to the next
2747 available argument in a variable-argument list. VALIST is the
2748 user-level va_list object, of type __builtin_va_list. */
2750 arm_extract_valist_ptr (tree valist
)
2752 if (TREE_TYPE (valist
) == error_mark_node
)
2753 return error_mark_node
;
2755 /* On an AAPCS target, the pointer is stored within "struct
2757 if (TARGET_AAPCS_BASED
)
2759 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2760 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2761 valist
, ap_field
, NULL_TREE
);
2767 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2769 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2771 valist
= arm_extract_valist_ptr (valist
);
2772 std_expand_builtin_va_start (valist
, nextarg
);
2775 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2777 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2780 valist
= arm_extract_valist_ptr (valist
);
2781 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2784 /* Check any incompatible options that the user has specified. */
2786 arm_option_check_internal (struct gcc_options
*opts
)
2788 int flags
= opts
->x_target_flags
;
2789 const struct arm_fpu_desc
*fpu_desc
= &all_fpus
[opts
->x_arm_fpu_index
];
2791 /* iWMMXt and NEON are incompatible. */
2793 && ARM_FPU_FSET_HAS (fpu_desc
->features
, FPU_FL_NEON
))
2794 error ("iWMMXt and NEON are incompatible");
2796 /* Make sure that the processor choice does not conflict with any of the
2797 other command line choices. */
2798 if (TARGET_ARM_P (flags
) && !ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
))
2799 error ("target CPU does not support ARM mode");
2801 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2802 from here where no function is being compiled currently. */
2803 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2804 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2806 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2807 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2809 /* If this target is normally configured to use APCS frames, warn if they
2810 are turned off and debugging is turned on. */
2811 if (TARGET_ARM_P (flags
)
2812 && write_symbols
!= NO_DEBUG
2813 && !TARGET_APCS_FRAME
2814 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2815 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2817 /* iWMMXt unsupported under Thumb mode. */
2818 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2819 error ("iWMMXt unsupported under Thumb mode");
2821 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2822 error ("can not use -mtp=cp15 with 16-bit Thumb");
2824 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2826 error ("RTP PIC is incompatible with Thumb");
2830 /* We only support -mslow-flash-data on armv7-m targets. */
2831 if (target_slow_flash_data
2832 && ((!(arm_arch7
&& !arm_arch_notm
) && !arm_arch7em
)
2833 || (TARGET_THUMB1_P (flags
) || flag_pic
|| TARGET_NEON
)))
2834 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2836 /* We only support pure-code on Thumb-2 M-profile targets. */
2837 if (target_pure_code
2838 && (!arm_arch_thumb2
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2839 error ("-mpure-code only supports non-pic code on armv7-m targets");
2843 /* Recompute the global settings depending on target attribute options. */
2846 arm_option_params_internal (void)
2848 /* If we are not using the default (ARM mode) section anchor offset
2849 ranges, then set the correct ranges now. */
2852 /* Thumb-1 LDR instructions cannot have negative offsets.
2853 Permissible positive offset ranges are 5-bit (for byte loads),
2854 6-bit (for halfword loads), or 7-bit (for word loads).
2855 Empirical results suggest a 7-bit anchor range gives the best
2856 overall code size. */
2857 targetm
.min_anchor_offset
= 0;
2858 targetm
.max_anchor_offset
= 127;
2860 else if (TARGET_THUMB2
)
2862 /* The minimum is set such that the total size of the block
2863 for a particular anchor is 248 + 1 + 4095 bytes, which is
2864 divisible by eight, ensuring natural spacing of anchors. */
2865 targetm
.min_anchor_offset
= -248;
2866 targetm
.max_anchor_offset
= 4095;
2870 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2871 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2876 /* If optimizing for size, bump the number of instructions that we
2877 are prepared to conditionally execute (even on a StrongARM). */
2878 max_insns_skipped
= 6;
2880 /* For THUMB2, we limit the conditional sequence to one IT block. */
2882 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2885 /* When -mrestrict-it is in use tone down the if-conversion. */
2886 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2887 ? 1 : current_tune
->max_insns_skipped
;
2890 /* True if -mflip-thumb should next add an attribute for the default
2891 mode, false if it should next add an attribute for the opposite mode. */
2892 static GTY(()) bool thumb_flipper
;
2894 /* Options after initial target override. */
2895 static GTY(()) tree init_optimize
;
2898 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2900 if (opts
->x_align_functions
<= 0)
2901 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2902 && opts
->x_optimize_size
? 2 : 4;
2905 /* Implement targetm.override_options_after_change. */
2908 arm_override_options_after_change (void)
2910 arm_override_options_after_change_1 (&global_options
);
2913 /* Reset options between modes that the user has specified. */
2915 arm_option_override_internal (struct gcc_options
*opts
,
2916 struct gcc_options
*opts_set
)
2918 arm_override_options_after_change_1 (opts
);
2920 if (TARGET_INTERWORK
&& !ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
))
2922 /* The default is to enable interworking, so this warning message would
2923 be confusing to users who have just compiled with, eg, -march=armv3. */
2924 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2925 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2928 if (TARGET_THUMB_P (opts
->x_target_flags
)
2929 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
)))
2931 warning (0, "target CPU does not support THUMB instructions");
2932 opts
->x_target_flags
&= ~MASK_THUMB
;
2935 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2937 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2938 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2941 /* Callee super interworking implies thumb interworking. Adding
2942 this to the flags here simplifies the logic elsewhere. */
2943 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2944 opts
->x_target_flags
|= MASK_INTERWORK
;
2946 /* need to remember initial values so combinaisons of options like
2947 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2948 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2950 if (! opts_set
->x_arm_restrict_it
)
2951 opts
->x_arm_restrict_it
= arm_arch8
;
2953 /* ARM execution state and M profile don't have [restrict] IT. */
2954 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2955 opts
->x_arm_restrict_it
= 0;
2957 /* Enable -munaligned-access by default for
2958 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2959 i.e. Thumb2 and ARM state only.
2960 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2961 - ARMv8 architecture-base processors.
2963 Disable -munaligned-access by default for
2964 - all pre-ARMv6 architecture-based processors
2965 - ARMv6-M architecture-based processors
2966 - ARMv8-M Baseline processors. */
2968 if (! opts_set
->x_unaligned_access
)
2970 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2971 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2973 else if (opts
->x_unaligned_access
== 1
2974 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2976 warning (0, "target CPU does not support unaligned accesses");
2977 opts
->x_unaligned_access
= 0;
2980 /* Don't warn since it's on by default in -O2. */
2981 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2982 opts
->x_flag_schedule_insns
= 0;
2984 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
2986 /* Disable shrink-wrap when optimizing function for size, since it tends to
2987 generate additional returns. */
2988 if (optimize_function_for_size_p (cfun
)
2989 && TARGET_THUMB2_P (opts
->x_target_flags
))
2990 opts
->x_flag_shrink_wrap
= false;
2992 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
2994 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2995 - epilogue_insns - does not accurately model the corresponding insns
2996 emitted in the asm file. In particular, see the comment in thumb_exit
2997 'Find out how many of the (return) argument registers we can corrupt'.
2998 As a consequence, the epilogue may clobber registers without fipa-ra
2999 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3000 TODO: Accurately model clobbers for epilogue_insns and reenable
3002 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3003 opts
->x_flag_ipa_ra
= 0;
3005 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3007 /* Thumb2 inline assembly code should always use unified syntax.
3008 This will apply to ARM and Thumb1 eventually. */
3009 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3011 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3012 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3016 /* Fix up any incompatible options that the user has specified. */
3018 arm_option_override (void)
3020 arm_selected_arch
= NULL
;
3021 arm_selected_cpu
= NULL
;
3022 arm_selected_tune
= NULL
;
3024 if (global_options_set
.x_arm_arch_option
)
3025 arm_selected_arch
= &all_architectures
[arm_arch_option
];
3027 if (global_options_set
.x_arm_cpu_option
)
3029 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
3030 arm_selected_tune
= &all_cores
[(int) arm_cpu_option
];
3033 if (global_options_set
.x_arm_tune_option
)
3034 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
3036 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3037 SUBTARGET_OVERRIDE_OPTIONS
;
3040 if (arm_selected_arch
)
3042 if (arm_selected_cpu
)
3044 const arm_feature_set tuning_flags
= ARM_FSET_MAKE_CPU1 (FL_TUNE
);
3045 arm_feature_set selected_flags
;
3046 ARM_FSET_XOR (selected_flags
, arm_selected_cpu
->flags
,
3047 arm_selected_arch
->flags
);
3048 ARM_FSET_EXCLUDE (selected_flags
, selected_flags
, tuning_flags
);
3049 /* Check for conflict between mcpu and march. */
3050 if (!ARM_FSET_IS_EMPTY (selected_flags
))
3052 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3053 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3054 /* -march wins for code generation.
3055 -mcpu wins for default tuning. */
3056 if (!arm_selected_tune
)
3057 arm_selected_tune
= arm_selected_cpu
;
3059 arm_selected_cpu
= arm_selected_arch
;
3063 arm_selected_arch
= NULL
;
3066 /* Pick a CPU based on the architecture. */
3067 arm_selected_cpu
= arm_selected_arch
;
3070 /* If the user did not specify a processor, choose one for them. */
3071 if (!arm_selected_cpu
)
3073 const struct processors
* sel
;
3074 arm_feature_set sought
= ARM_FSET_EMPTY
;;
3076 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3077 gcc_assert (arm_selected_cpu
->name
);
3079 sel
= arm_selected_cpu
;
3080 insn_flags
= sel
->flags
;
3082 /* Now check to see if the user has specified some command line
3083 switch that require certain abilities from the cpu. */
3085 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3087 ARM_FSET_ADD_CPU1 (sought
, FL_THUMB
);
3088 ARM_FSET_ADD_CPU1 (sought
, FL_MODE32
);
3090 /* There are no ARM processors that support both APCS-26 and
3091 interworking. Therefore we force FL_MODE26 to be removed
3092 from insn_flags here (if it was set), so that the search
3093 below will always be able to find a compatible processor. */
3094 ARM_FSET_DEL_CPU1 (insn_flags
, FL_MODE26
);
3097 if (!ARM_FSET_IS_EMPTY (sought
)
3098 && !(ARM_FSET_CPU_SUBSET (sought
, insn_flags
)))
3100 /* Try to locate a CPU type that supports all of the abilities
3101 of the default CPU, plus the extra abilities requested by
3103 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3104 if (ARM_FSET_CPU_SUBSET (sought
, sel
->flags
))
3107 if (sel
->name
== NULL
)
3109 unsigned current_bit_count
= 0;
3110 const struct processors
* best_fit
= NULL
;
3112 /* Ideally we would like to issue an error message here
3113 saying that it was not possible to find a CPU compatible
3114 with the default CPU, but which also supports the command
3115 line options specified by the programmer, and so they
3116 ought to use the -mcpu=<name> command line option to
3117 override the default CPU type.
3119 If we cannot find a cpu that has both the
3120 characteristics of the default cpu and the given
3121 command line options we scan the array again looking
3122 for a best match. */
3123 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3125 arm_feature_set required
= ARM_FSET_EMPTY
;
3126 ARM_FSET_UNION (required
, sought
, insn_flags
);
3127 if (ARM_FSET_CPU_SUBSET (required
, sel
->flags
))
3130 arm_feature_set flags
;
3131 ARM_FSET_INTER (flags
, sel
->flags
, insn_flags
);
3132 count
= feature_count (&flags
);
3134 if (count
>= current_bit_count
)
3137 current_bit_count
= count
;
3141 gcc_assert (best_fit
);
3145 arm_selected_cpu
= sel
;
3149 gcc_assert (arm_selected_cpu
);
3150 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3151 if (!arm_selected_tune
)
3152 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3154 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
3155 insn_flags
= arm_selected_cpu
->flags
;
3156 arm_base_arch
= arm_selected_cpu
->base_arch
;
3158 arm_tune
= arm_selected_tune
->core
;
3159 tune_flags
= arm_selected_tune
->flags
;
3160 current_tune
= arm_selected_tune
->tune
;
3162 /* TBD: Dwarf info for apcs frame is not handled yet. */
3163 if (TARGET_APCS_FRAME
)
3164 flag_shrink_wrap
= false;
3166 /* BPABI targets use linker tricks to allow interworking on cores
3167 without thumb support. */
3168 if (TARGET_INTERWORK
3169 && !(ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
) || TARGET_BPABI
))
3171 warning (0, "target CPU does not support interworking" );
3172 target_flags
&= ~MASK_INTERWORK
;
3175 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3177 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3178 target_flags
|= MASK_APCS_FRAME
;
3181 if (TARGET_POKE_FUNCTION_NAME
)
3182 target_flags
|= MASK_APCS_FRAME
;
3184 if (TARGET_APCS_REENT
&& flag_pic
)
3185 error ("-fpic and -mapcs-reent are incompatible");
3187 if (TARGET_APCS_REENT
)
3188 warning (0, "APCS reentrant code not supported. Ignored");
3190 if (TARGET_APCS_FLOAT
)
3191 warning (0, "passing floating point arguments in fp regs not yet supported");
3193 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3194 arm_arch3m
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH3M
);
3195 arm_arch4
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH4
);
3196 arm_arch4t
= arm_arch4
&& (ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
));
3197 arm_arch5
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5
);
3198 arm_arch5e
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH5E
);
3199 arm_arch6
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6
);
3200 arm_arch6k
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6K
);
3201 arm_arch6kz
= arm_arch6k
&& ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH6KZ
);
3202 arm_arch_notm
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NOTM
);
3203 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3204 arm_arch7
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7
);
3205 arm_arch7em
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH7EM
);
3206 arm_arch8
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARCH8
);
3207 arm_arch8_1
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_ARCH8_1
);
3208 arm_arch8_2
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_ARCH8_2
);
3209 arm_arch_thumb1
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB
);
3210 arm_arch_thumb2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB2
);
3211 arm_arch_xscale
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_XSCALE
);
3213 arm_ld_sched
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_LDSCHED
);
3214 arm_tune_strongarm
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_STRONG
);
3215 arm_tune_wbuf
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_WBUF
);
3216 arm_tune_xscale
= ARM_FSET_HAS_CPU1 (tune_flags
, FL_XSCALE
);
3217 arm_arch_iwmmxt
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT
);
3218 arm_arch_iwmmxt2
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_IWMMXT2
);
3219 arm_arch_thumb_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_THUMB_DIV
);
3220 arm_arch_arm_hwdiv
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_ARM_DIV
);
3221 arm_arch_no_volatile_ce
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_NO_VOLATILE_CE
);
3222 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
3223 arm_arch_crc
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_CRC32
);
3224 arm_m_profile_small_mul
= ARM_FSET_HAS_CPU1 (insn_flags
, FL_SMALLMUL
);
3225 arm_fp16_inst
= ARM_FSET_HAS_CPU2 (insn_flags
, FL2_FP16INST
);
3228 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3229 error ("selected fp16 options are incompatible.");
3230 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3233 /* V5 code we generate is completely interworking capable, so we turn off
3234 TARGET_INTERWORK here to avoid many tests later on. */
3236 /* XXX However, we must pass the right pre-processor defines to CPP
3237 or GLD can get confused. This is a hack. */
3238 if (TARGET_INTERWORK
)
3239 arm_cpp_interwork
= 1;
3242 target_flags
&= ~MASK_INTERWORK
;
3244 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3245 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3247 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3248 error ("iwmmxt abi requires an iwmmxt capable cpu");
3250 if (!global_options_set
.x_arm_fpu_index
)
3252 const char *target_fpu_name
;
3255 #ifdef FPUTYPE_DEFAULT
3256 target_fpu_name
= FPUTYPE_DEFAULT
;
3258 target_fpu_name
= "vfp";
3261 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
3266 /* If soft-float is specified then don't use FPU. */
3267 if (TARGET_SOFT_FLOAT
)
3268 arm_fpu_attr
= FPU_NONE
;
3270 arm_fpu_attr
= FPU_VFP
;
3272 if (TARGET_AAPCS_BASED
)
3274 if (TARGET_CALLER_INTERWORKING
)
3275 error ("AAPCS does not support -mcaller-super-interworking");
3277 if (TARGET_CALLEE_INTERWORKING
)
3278 error ("AAPCS does not support -mcallee-super-interworking");
3281 /* __fp16 support currently assumes the core has ldrh. */
3282 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3283 sorry ("__fp16 and no ldrh");
3285 if (TARGET_AAPCS_BASED
)
3287 if (arm_abi
== ARM_ABI_IWMMXT
)
3288 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3289 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3290 && TARGET_HARD_FLOAT
)
3291 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3293 arm_pcs_default
= ARM_PCS_AAPCS
;
3297 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3298 sorry ("-mfloat-abi=hard and VFP");
3300 if (arm_abi
== ARM_ABI_APCS
)
3301 arm_pcs_default
= ARM_PCS_APCS
;
3303 arm_pcs_default
= ARM_PCS_ATPCS
;
3306 /* For arm2/3 there is no need to do any scheduling if we are doing
3307 software floating-point. */
3308 if (TARGET_SOFT_FLOAT
&& !ARM_FSET_HAS_CPU1 (tune_flags
, FL_MODE32
))
3309 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3311 /* Use the cp15 method if it is available. */
3312 if (target_thread_pointer
== TP_AUTO
)
3314 if (arm_arch6k
&& !TARGET_THUMB1
)
3315 target_thread_pointer
= TP_CP15
;
3317 target_thread_pointer
= TP_SOFT
;
3320 /* Override the default structure alignment for AAPCS ABI. */
3321 if (!global_options_set
.x_arm_structure_size_boundary
)
3323 if (TARGET_AAPCS_BASED
)
3324 arm_structure_size_boundary
= 8;
3328 if (arm_structure_size_boundary
!= 8
3329 && arm_structure_size_boundary
!= 32
3330 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3332 if (ARM_DOUBLEWORD_ALIGN
)
3334 "structure size boundary can only be set to 8, 32 or 64");
3336 warning (0, "structure size boundary can only be set to 8 or 32");
3337 arm_structure_size_boundary
3338 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3342 if (TARGET_VXWORKS_RTP
)
3344 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3345 arm_pic_data_is_text_relative
= 0;
3348 && !arm_pic_data_is_text_relative
3349 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3350 /* When text & data segments don't have a fixed displacement, the
3351 intended use is with a single, read only, pic base register.
3352 Unless the user explicitly requested not to do that, set
3354 target_flags
|= MASK_SINGLE_PIC_BASE
;
3356 /* If stack checking is disabled, we can use r10 as the PIC register,
3357 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3358 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3360 if (TARGET_VXWORKS_RTP
)
3361 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3362 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3365 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3366 arm_pic_register
= 9;
3368 if (arm_pic_register_string
!= NULL
)
3370 int pic_register
= decode_reg_name (arm_pic_register_string
);
3373 warning (0, "-mpic-register= is useless without -fpic");
3375 /* Prevent the user from choosing an obviously stupid PIC register. */
3376 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3377 || pic_register
== HARD_FRAME_POINTER_REGNUM
3378 || pic_register
== STACK_POINTER_REGNUM
3379 || pic_register
>= PC_REGNUM
3380 || (TARGET_VXWORKS_RTP
3381 && (unsigned int) pic_register
!= arm_pic_register
))
3382 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3384 arm_pic_register
= pic_register
;
3387 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3388 if (fix_cm3_ldrd
== 2)
3390 if (arm_selected_cpu
->core
== cortexm3
)
3396 /* Hot/Cold partitioning is not currently supported, since we can't
3397 handle literal pool placement in that case. */
3398 if (flag_reorder_blocks_and_partition
)
3400 inform (input_location
,
3401 "-freorder-blocks-and-partition not supported on this architecture");
3402 flag_reorder_blocks_and_partition
= 0;
3403 flag_reorder_blocks
= 1;
3407 /* Hoisting PIC address calculations more aggressively provides a small,
3408 but measurable, size reduction for PIC code. Therefore, we decrease
3409 the bar for unrestricted expression hoisting to the cost of PIC address
3410 calculation, which is 2 instructions. */
3411 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3412 global_options
.x_param_values
,
3413 global_options_set
.x_param_values
);
3415 /* ARM EABI defaults to strict volatile bitfields. */
3416 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3417 && abi_version_at_least(2))
3418 flag_strict_volatile_bitfields
= 1;
3420 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3421 have deemed it beneficial (signified by setting
3422 prefetch.num_slots to 1 or more). */
3423 if (flag_prefetch_loop_arrays
< 0
3426 && current_tune
->prefetch
.num_slots
> 0)
3427 flag_prefetch_loop_arrays
= 1;
3429 /* Set up parameters to be used in prefetching algorithm. Do not
3430 override the defaults unless we are tuning for a core we have
3431 researched values for. */
3432 if (current_tune
->prefetch
.num_slots
> 0)
3433 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3434 current_tune
->prefetch
.num_slots
,
3435 global_options
.x_param_values
,
3436 global_options_set
.x_param_values
);
3437 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3438 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3439 current_tune
->prefetch
.l1_cache_line_size
,
3440 global_options
.x_param_values
,
3441 global_options_set
.x_param_values
);
3442 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3443 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3444 current_tune
->prefetch
.l1_cache_size
,
3445 global_options
.x_param_values
,
3446 global_options_set
.x_param_values
);
3448 /* Use Neon to perform 64-bits operations rather than core
3450 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3451 if (use_neon_for_64bits
== 1)
3452 prefer_neon_for_64bits
= true;
3454 /* Use the alternative scheduling-pressure algorithm by default. */
3455 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3456 global_options
.x_param_values
,
3457 global_options_set
.x_param_values
);
3459 /* Look through ready list and all of queue for instructions
3460 relevant for L2 auto-prefetcher. */
3461 int param_sched_autopref_queue_depth
;
3463 switch (current_tune
->sched_autopref
)
3465 case tune_params::SCHED_AUTOPREF_OFF
:
3466 param_sched_autopref_queue_depth
= -1;
3469 case tune_params::SCHED_AUTOPREF_RANK
:
3470 param_sched_autopref_queue_depth
= 0;
3473 case tune_params::SCHED_AUTOPREF_FULL
:
3474 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3481 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3482 param_sched_autopref_queue_depth
,
3483 global_options
.x_param_values
,
3484 global_options_set
.x_param_values
);
3486 /* Currently, for slow flash data, we just disable literal pools. We also
3487 disable it for pure-code. */
3488 if (target_slow_flash_data
|| target_pure_code
)
3489 arm_disable_literal_pool
= true;
3491 /* Disable scheduling fusion by default if it's not armv7 processor
3492 or doesn't prefer ldrd/strd. */
3493 if (flag_schedule_fusion
== 2
3494 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3495 flag_schedule_fusion
= 0;
3497 /* Need to remember initial options before they are overriden. */
3498 init_optimize
= build_optimization_node (&global_options
);
3500 arm_option_override_internal (&global_options
, &global_options_set
);
3501 arm_option_check_internal (&global_options
);
3502 arm_option_params_internal ();
3504 /* Register global variables with the garbage collector. */
3505 arm_add_gc_roots ();
3507 /* Save the initial options in case the user does function specific
3508 options or #pragma target. */
3509 target_option_default_node
= target_option_current_node
3510 = build_target_option_node (&global_options
);
3512 /* Init initial mode for testing. */
3513 thumb_flipper
= TARGET_THUMB
;
3517 arm_add_gc_roots (void)
3519 gcc_obstack_init(&minipool_obstack
);
3520 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3523 /* A table of known ARM exception types.
3524 For use with the interrupt function attribute. */
3528 const char *const arg
;
3529 const unsigned long return_value
;
3533 static const isr_attribute_arg isr_attribute_args
[] =
3535 { "IRQ", ARM_FT_ISR
},
3536 { "irq", ARM_FT_ISR
},
3537 { "FIQ", ARM_FT_FIQ
},
3538 { "fiq", ARM_FT_FIQ
},
3539 { "ABORT", ARM_FT_ISR
},
3540 { "abort", ARM_FT_ISR
},
3541 { "ABORT", ARM_FT_ISR
},
3542 { "abort", ARM_FT_ISR
},
3543 { "UNDEF", ARM_FT_EXCEPTION
},
3544 { "undef", ARM_FT_EXCEPTION
},
3545 { "SWI", ARM_FT_EXCEPTION
},
3546 { "swi", ARM_FT_EXCEPTION
},
3547 { NULL
, ARM_FT_NORMAL
}
3550 /* Returns the (interrupt) function type of the current
3551 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3553 static unsigned long
3554 arm_isr_value (tree argument
)
3556 const isr_attribute_arg
* ptr
;
3560 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3562 /* No argument - default to IRQ. */
3563 if (argument
== NULL_TREE
)
3566 /* Get the value of the argument. */
3567 if (TREE_VALUE (argument
) == NULL_TREE
3568 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3569 return ARM_FT_UNKNOWN
;
3571 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3573 /* Check it against the list of known arguments. */
3574 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3575 if (streq (arg
, ptr
->arg
))
3576 return ptr
->return_value
;
3578 /* An unrecognized interrupt type. */
3579 return ARM_FT_UNKNOWN
;
3582 /* Computes the type of the current function. */
3584 static unsigned long
3585 arm_compute_func_type (void)
3587 unsigned long type
= ARM_FT_UNKNOWN
;
3591 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3593 /* Decide if the current function is volatile. Such functions
3594 never return, and many memory cycles can be saved by not storing
3595 register values that will never be needed again. This optimization
3596 was added to speed up context switching in a kernel application. */
3598 && (TREE_NOTHROW (current_function_decl
)
3599 || !(flag_unwind_tables
3601 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3602 && TREE_THIS_VOLATILE (current_function_decl
))
3603 type
|= ARM_FT_VOLATILE
;
3605 if (cfun
->static_chain_decl
!= NULL
)
3606 type
|= ARM_FT_NESTED
;
3608 attr
= DECL_ATTRIBUTES (current_function_decl
);
3610 a
= lookup_attribute ("naked", attr
);
3612 type
|= ARM_FT_NAKED
;
3614 a
= lookup_attribute ("isr", attr
);
3616 a
= lookup_attribute ("interrupt", attr
);
3619 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3621 type
|= arm_isr_value (TREE_VALUE (a
));
3626 /* Returns the type of the current function. */
3629 arm_current_func_type (void)
3631 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3632 cfun
->machine
->func_type
= arm_compute_func_type ();
3634 return cfun
->machine
->func_type
;
3638 arm_allocate_stack_slots_for_args (void)
3640 /* Naked functions should not allocate stack slots for arguments. */
3641 return !IS_NAKED (arm_current_func_type ());
3645 arm_warn_func_return (tree decl
)
3647 /* Naked functions are implemented entirely in assembly, including the
3648 return sequence, so suppress warnings about this. */
3649 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3653 /* Output assembler code for a block containing the constant parts
3654 of a trampoline, leaving space for the variable parts.
3656 On the ARM, (if r8 is the static chain regnum, and remembering that
3657 referencing pc adds an offset of 8) the trampoline looks like:
3660 .word static chain value
3661 .word function's address
3662 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3665 arm_asm_trampoline_template (FILE *f
)
3667 fprintf (f
, "\t.syntax unified\n");
3671 fprintf (f
, "\t.arm\n");
3672 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3673 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3675 else if (TARGET_THUMB2
)
3677 fprintf (f
, "\t.thumb\n");
3678 /* The Thumb-2 trampoline is similar to the arm implementation.
3679 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3680 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3681 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3682 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3686 ASM_OUTPUT_ALIGN (f
, 2);
3687 fprintf (f
, "\t.code\t16\n");
3688 fprintf (f
, ".Ltrampoline_start:\n");
3689 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3690 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3691 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3692 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3693 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3694 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3696 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3697 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3700 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3703 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3705 rtx fnaddr
, mem
, a_tramp
;
3707 emit_block_move (m_tramp
, assemble_trampoline_template (),
3708 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3710 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3711 emit_move_insn (mem
, chain_value
);
3713 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3714 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3715 emit_move_insn (mem
, fnaddr
);
3717 a_tramp
= XEXP (m_tramp
, 0);
3718 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3719 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3720 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3723 /* Thumb trampolines should be entered in thumb mode, so set
3724 the bottom bit of the address. */
3727 arm_trampoline_adjust_address (rtx addr
)
3730 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3731 NULL
, 0, OPTAB_LIB_WIDEN
);
3735 /* Return 1 if it is possible to return using a single instruction.
3736 If SIBLING is non-null, this is a test for a return before a sibling
3737 call. SIBLING is the call insn, so we can examine its register usage. */
3740 use_return_insn (int iscond
, rtx sibling
)
3743 unsigned int func_type
;
3744 unsigned long saved_int_regs
;
3745 unsigned HOST_WIDE_INT stack_adjust
;
3746 arm_stack_offsets
*offsets
;
3748 /* Never use a return instruction before reload has run. */
3749 if (!reload_completed
)
3752 func_type
= arm_current_func_type ();
3754 /* Naked, volatile and stack alignment functions need special
3756 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3759 /* So do interrupt functions that use the frame pointer and Thumb
3760 interrupt functions. */
3761 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3764 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3765 && !optimize_function_for_size_p (cfun
))
3768 offsets
= arm_get_frame_offsets ();
3769 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3771 /* As do variadic functions. */
3772 if (crtl
->args
.pretend_args_size
3773 || cfun
->machine
->uses_anonymous_args
3774 /* Or if the function calls __builtin_eh_return () */
3775 || crtl
->calls_eh_return
3776 /* Or if the function calls alloca */
3777 || cfun
->calls_alloca
3778 /* Or if there is a stack adjustment. However, if the stack pointer
3779 is saved on the stack, we can use a pre-incrementing stack load. */
3780 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3781 && stack_adjust
== 4))
3782 /* Or if the static chain register was saved above the frame, under the
3783 assumption that the stack pointer isn't saved on the stack. */
3784 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3785 && arm_compute_static_chain_stack_bytes() != 0))
3788 saved_int_regs
= offsets
->saved_regs_mask
;
3790 /* Unfortunately, the insn
3792 ldmib sp, {..., sp, ...}
3794 triggers a bug on most SA-110 based devices, such that the stack
3795 pointer won't be correctly restored if the instruction takes a
3796 page fault. We work around this problem by popping r3 along with
3797 the other registers, since that is never slower than executing
3798 another instruction.
3800 We test for !arm_arch5 here, because code for any architecture
3801 less than this could potentially be run on one of the buggy
3803 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3805 /* Validate that r3 is a call-clobbered register (always true in
3806 the default abi) ... */
3807 if (!call_used_regs
[3])
3810 /* ... that it isn't being used for a return value ... */
3811 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3814 /* ... or for a tail-call argument ... */
3817 gcc_assert (CALL_P (sibling
));
3819 if (find_regno_fusage (sibling
, USE
, 3))
3823 /* ... and that there are no call-saved registers in r0-r2
3824 (always true in the default ABI). */
3825 if (saved_int_regs
& 0x7)
3829 /* Can't be done if interworking with Thumb, and any registers have been
3831 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3834 /* On StrongARM, conditional returns are expensive if they aren't
3835 taken and multiple registers have been stacked. */
3836 if (iscond
&& arm_tune_strongarm
)
3838 /* Conditional return when just the LR is stored is a simple
3839 conditional-load instruction, that's not expensive. */
3840 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3844 && arm_pic_register
!= INVALID_REGNUM
3845 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
3849 /* If there are saved registers but the LR isn't saved, then we need
3850 two instructions for the return. */
3851 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
3854 /* Can't be done if any of the VFP regs are pushed,
3855 since this also requires an insn. */
3856 if (TARGET_HARD_FLOAT
)
3857 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
3858 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
3861 if (TARGET_REALLY_IWMMXT
)
3862 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
3863 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
3869 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3870 shrink-wrapping if possible. This is the case if we need to emit a
3871 prologue, which we can test by looking at the offsets. */
3873 use_simple_return_p (void)
3875 arm_stack_offsets
*offsets
;
3877 offsets
= arm_get_frame_offsets ();
3878 return offsets
->outgoing_args
!= 0;
3881 /* Return TRUE if int I is a valid immediate ARM constant. */
3884 const_ok_for_arm (HOST_WIDE_INT i
)
3888 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3889 be all zero, or all one. */
3890 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
3891 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
3892 != ((~(unsigned HOST_WIDE_INT
) 0)
3893 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
3896 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
3898 /* Fast return for 0 and small values. We must do this for zero, since
3899 the code below can't handle that one case. */
3900 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
3903 /* Get the number of trailing zeros. */
3904 lowbit
= ffs((int) i
) - 1;
3906 /* Only even shifts are allowed in ARM mode so round down to the
3907 nearest even number. */
3911 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
3916 /* Allow rotated constants in ARM mode. */
3918 && ((i
& ~0xc000003f) == 0
3919 || (i
& ~0xf000000f) == 0
3920 || (i
& ~0xfc000003) == 0))
3927 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3930 if (i
== v
|| i
== (v
| (v
<< 8)))
3933 /* Allow repeated pattern 0xXY00XY00. */
3943 /* Return true if I is a valid constant for the operation CODE. */
3945 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
3947 if (const_ok_for_arm (i
))
3953 /* See if we can use movw. */
3954 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
3957 /* Otherwise, try mvn. */
3958 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3961 /* See if we can use addw or subw. */
3963 && ((i
& 0xfffff000) == 0
3964 || ((-i
) & 0xfffff000) == 0))
3985 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
3987 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
3993 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
3997 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4004 /* Return true if I is a valid di mode constant for the operation CODE. */
4006 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4008 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4009 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4010 rtx hi
= GEN_INT (hi_val
);
4011 rtx lo
= GEN_INT (lo_val
);
4021 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4022 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4024 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4031 /* Emit a sequence of insns to handle a large constant.
4032 CODE is the code of the operation required, it can be any of SET, PLUS,
4033 IOR, AND, XOR, MINUS;
4034 MODE is the mode in which the operation is being performed;
4035 VAL is the integer to operate on;
4036 SOURCE is the other operand (a register, or a null-pointer for SET);
4037 SUBTARGETS means it is safe to create scratch registers if that will
4038 either produce a simpler sequence, or we will want to cse the values.
4039 Return value is the number of insns emitted. */
4041 /* ??? Tweak this for thumb2. */
4043 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4044 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4048 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4049 cond
= COND_EXEC_TEST (PATTERN (insn
));
4053 if (subtargets
|| code
== SET
4054 || (REG_P (target
) && REG_P (source
)
4055 && REGNO (target
) != REGNO (source
)))
4057 /* After arm_reorg has been called, we can't fix up expensive
4058 constants by pushing them into memory so we must synthesize
4059 them in-line, regardless of the cost. This is only likely to
4060 be more costly on chips that have load delay slots and we are
4061 compiling without running the scheduler (so no splitting
4062 occurred before the final instruction emission).
4064 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4066 if (!cfun
->machine
->after_arm_reorg
4068 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4070 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4075 /* Currently SET is the only monadic value for CODE, all
4076 the rest are diadic. */
4077 if (TARGET_USE_MOVT
)
4078 arm_emit_movpair (target
, GEN_INT (val
));
4080 emit_set_insn (target
, GEN_INT (val
));
4086 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4088 if (TARGET_USE_MOVT
)
4089 arm_emit_movpair (temp
, GEN_INT (val
));
4091 emit_set_insn (temp
, GEN_INT (val
));
4093 /* For MINUS, the value is subtracted from, since we never
4094 have subtraction of a constant. */
4096 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4098 emit_set_insn (target
,
4099 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4105 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4109 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4110 ARM/THUMB2 immediates, and add up to VAL.
4111 Thr function return value gives the number of insns required. */
4113 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4114 struct four_ints
*return_sequence
)
4116 int best_consecutive_zeros
= 0;
4120 struct four_ints tmp_sequence
;
4122 /* If we aren't targeting ARM, the best place to start is always at
4123 the bottom, otherwise look more closely. */
4126 for (i
= 0; i
< 32; i
+= 2)
4128 int consecutive_zeros
= 0;
4130 if (!(val
& (3 << i
)))
4132 while ((i
< 32) && !(val
& (3 << i
)))
4134 consecutive_zeros
+= 2;
4137 if (consecutive_zeros
> best_consecutive_zeros
)
4139 best_consecutive_zeros
= consecutive_zeros
;
4140 best_start
= i
- consecutive_zeros
;
4147 /* So long as it won't require any more insns to do so, it's
4148 desirable to emit a small constant (in bits 0...9) in the last
4149 insn. This way there is more chance that it can be combined with
4150 a later addressing insn to form a pre-indexed load or store
4151 operation. Consider:
4153 *((volatile int *)0xe0000100) = 1;
4154 *((volatile int *)0xe0000110) = 2;
4156 We want this to wind up as:
4160 str rB, [rA, #0x100]
4162 str rB, [rA, #0x110]
4164 rather than having to synthesize both large constants from scratch.
4166 Therefore, we calculate how many insns would be required to emit
4167 the constant starting from `best_start', and also starting from
4168 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4169 yield a shorter sequence, we may as well use zero. */
4170 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4172 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4174 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4175 if (insns2
<= insns1
)
4177 *return_sequence
= tmp_sequence
;
4185 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4187 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4188 struct four_ints
*return_sequence
, int i
)
4190 int remainder
= val
& 0xffffffff;
4193 /* Try and find a way of doing the job in either two or three
4196 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4197 location. We start at position I. This may be the MSB, or
4198 optimial_immediate_sequence may have positioned it at the largest block
4199 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4200 wrapping around to the top of the word when we drop off the bottom.
4201 In the worst case this code should produce no more than four insns.
4203 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4204 constants, shifted to any arbitrary location. We should always start
4209 unsigned int b1
, b2
, b3
, b4
;
4210 unsigned HOST_WIDE_INT result
;
4213 gcc_assert (insns
< 4);
4218 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4219 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4222 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4223 /* We can use addw/subw for the last 12 bits. */
4227 /* Use an 8-bit shifted/rotated immediate. */
4231 result
= remainder
& ((0x0ff << end
)
4232 | ((i
< end
) ? (0xff >> (32 - end
))
4239 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4240 arbitrary shifts. */
4241 i
-= TARGET_ARM
? 2 : 1;
4245 /* Next, see if we can do a better job with a thumb2 replicated
4248 We do it this way around to catch the cases like 0x01F001E0 where
4249 two 8-bit immediates would work, but a replicated constant would
4252 TODO: 16-bit constants that don't clear all the bits, but still win.
4253 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4256 b1
= (remainder
& 0xff000000) >> 24;
4257 b2
= (remainder
& 0x00ff0000) >> 16;
4258 b3
= (remainder
& 0x0000ff00) >> 8;
4259 b4
= remainder
& 0xff;
4263 /* The 8-bit immediate already found clears b1 (and maybe b2),
4264 but must leave b3 and b4 alone. */
4266 /* First try to find a 32-bit replicated constant that clears
4267 almost everything. We can assume that we can't do it in one,
4268 or else we wouldn't be here. */
4269 unsigned int tmp
= b1
& b2
& b3
& b4
;
4270 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4272 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4273 + (tmp
== b3
) + (tmp
== b4
);
4275 && (matching_bytes
>= 3
4276 || (matching_bytes
== 2
4277 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4279 /* At least 3 of the bytes match, and the fourth has at
4280 least as many bits set, or two of the bytes match
4281 and it will only require one more insn to finish. */
4289 /* Second, try to find a 16-bit replicated constant that can
4290 leave three of the bytes clear. If b2 or b4 is already
4291 zero, then we can. If the 8-bit from above would not
4292 clear b2 anyway, then we still win. */
4293 else if (b1
== b3
&& (!b2
|| !b4
4294 || (remainder
& 0x00ff0000 & ~result
)))
4296 result
= remainder
& 0xff00ff00;
4302 /* The 8-bit immediate already found clears b2 (and maybe b3)
4303 and we don't get here unless b1 is alredy clear, but it will
4304 leave b4 unchanged. */
4306 /* If we can clear b2 and b4 at once, then we win, since the
4307 8-bits couldn't possibly reach that far. */
4310 result
= remainder
& 0x00ff00ff;
4316 return_sequence
->i
[insns
++] = result
;
4317 remainder
&= ~result
;
4319 if (code
== SET
|| code
== MINUS
)
4327 /* Emit an instruction with the indicated PATTERN. If COND is
4328 non-NULL, conditionalize the execution of the instruction on COND
4332 emit_constant_insn (rtx cond
, rtx pattern
)
4335 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4336 emit_insn (pattern
);
4339 /* As above, but extra parameter GENERATE which, if clear, suppresses
4343 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4344 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4345 int subtargets
, int generate
)
4349 int final_invert
= 0;
4351 int set_sign_bit_copies
= 0;
4352 int clear_sign_bit_copies
= 0;
4353 int clear_zero_bit_copies
= 0;
4354 int set_zero_bit_copies
= 0;
4355 int insns
= 0, neg_insns
, inv_insns
;
4356 unsigned HOST_WIDE_INT temp1
, temp2
;
4357 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4358 struct four_ints
*immediates
;
4359 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4361 /* Find out which operations are safe for a given CODE. Also do a quick
4362 check for degenerate cases; these can occur when DImode operations
4375 if (remainder
== 0xffffffff)
4378 emit_constant_insn (cond
,
4379 gen_rtx_SET (target
,
4380 GEN_INT (ARM_SIGN_EXTEND (val
))));
4386 if (reload_completed
&& rtx_equal_p (target
, source
))
4390 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4399 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4402 if (remainder
== 0xffffffff)
4404 if (reload_completed
&& rtx_equal_p (target
, source
))
4407 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4416 if (reload_completed
&& rtx_equal_p (target
, source
))
4419 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4423 if (remainder
== 0xffffffff)
4426 emit_constant_insn (cond
,
4427 gen_rtx_SET (target
,
4428 gen_rtx_NOT (mode
, source
)));
4435 /* We treat MINUS as (val - source), since (source - val) is always
4436 passed as (source + (-val)). */
4440 emit_constant_insn (cond
,
4441 gen_rtx_SET (target
,
4442 gen_rtx_NEG (mode
, source
)));
4445 if (const_ok_for_arm (val
))
4448 emit_constant_insn (cond
,
4449 gen_rtx_SET (target
,
4450 gen_rtx_MINUS (mode
, GEN_INT (val
),
4461 /* If we can do it in one insn get out quickly. */
4462 if (const_ok_for_op (val
, code
))
4465 emit_constant_insn (cond
,
4466 gen_rtx_SET (target
,
4468 ? gen_rtx_fmt_ee (code
, mode
, source
,
4474 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4476 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4477 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4481 if (mode
== SImode
&& i
== 16)
4482 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4484 emit_constant_insn (cond
,
4485 gen_zero_extendhisi2
4486 (target
, gen_lowpart (HImode
, source
)));
4488 /* Extz only supports SImode, but we can coerce the operands
4490 emit_constant_insn (cond
,
4491 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4492 gen_lowpart (SImode
, source
),
4493 GEN_INT (i
), const0_rtx
));
4499 /* Calculate a few attributes that may be useful for specific
4501 /* Count number of leading zeros. */
4502 for (i
= 31; i
>= 0; i
--)
4504 if ((remainder
& (1 << i
)) == 0)
4505 clear_sign_bit_copies
++;
4510 /* Count number of leading 1's. */
4511 for (i
= 31; i
>= 0; i
--)
4513 if ((remainder
& (1 << i
)) != 0)
4514 set_sign_bit_copies
++;
4519 /* Count number of trailing zero's. */
4520 for (i
= 0; i
<= 31; i
++)
4522 if ((remainder
& (1 << i
)) == 0)
4523 clear_zero_bit_copies
++;
4528 /* Count number of trailing 1's. */
4529 for (i
= 0; i
<= 31; i
++)
4531 if ((remainder
& (1 << i
)) != 0)
4532 set_zero_bit_copies
++;
4540 /* See if we can do this by sign_extending a constant that is known
4541 to be negative. This is a good, way of doing it, since the shift
4542 may well merge into a subsequent insn. */
4543 if (set_sign_bit_copies
> 1)
4545 if (const_ok_for_arm
4546 (temp1
= ARM_SIGN_EXTEND (remainder
4547 << (set_sign_bit_copies
- 1))))
4551 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4552 emit_constant_insn (cond
,
4553 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4554 emit_constant_insn (cond
,
4555 gen_ashrsi3 (target
, new_src
,
4556 GEN_INT (set_sign_bit_copies
- 1)));
4560 /* For an inverted constant, we will need to set the low bits,
4561 these will be shifted out of harm's way. */
4562 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4563 if (const_ok_for_arm (~temp1
))
4567 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4568 emit_constant_insn (cond
,
4569 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4570 emit_constant_insn (cond
,
4571 gen_ashrsi3 (target
, new_src
,
4572 GEN_INT (set_sign_bit_copies
- 1)));
4578 /* See if we can calculate the value as the difference between two
4579 valid immediates. */
4580 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4582 int topshift
= clear_sign_bit_copies
& ~1;
4584 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4585 & (0xff000000 >> topshift
));
4587 /* If temp1 is zero, then that means the 9 most significant
4588 bits of remainder were 1 and we've caused it to overflow.
4589 When topshift is 0 we don't need to do anything since we
4590 can borrow from 'bit 32'. */
4591 if (temp1
== 0 && topshift
!= 0)
4592 temp1
= 0x80000000 >> (topshift
- 1);
4594 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4596 if (const_ok_for_arm (temp2
))
4600 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4601 emit_constant_insn (cond
,
4602 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4603 emit_constant_insn (cond
,
4604 gen_addsi3 (target
, new_src
,
4612 /* See if we can generate this by setting the bottom (or the top)
4613 16 bits, and then shifting these into the other half of the
4614 word. We only look for the simplest cases, to do more would cost
4615 too much. Be careful, however, not to generate this when the
4616 alternative would take fewer insns. */
4617 if (val
& 0xffff0000)
4619 temp1
= remainder
& 0xffff0000;
4620 temp2
= remainder
& 0x0000ffff;
4622 /* Overlaps outside this range are best done using other methods. */
4623 for (i
= 9; i
< 24; i
++)
4625 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4626 && !const_ok_for_arm (temp2
))
4628 rtx new_src
= (subtargets
4629 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4631 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4632 source
, subtargets
, generate
);
4640 gen_rtx_ASHIFT (mode
, source
,
4647 /* Don't duplicate cases already considered. */
4648 for (i
= 17; i
< 24; i
++)
4650 if (((temp1
| (temp1
>> i
)) == remainder
)
4651 && !const_ok_for_arm (temp1
))
4653 rtx new_src
= (subtargets
4654 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4656 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4657 source
, subtargets
, generate
);
4662 gen_rtx_SET (target
,
4665 gen_rtx_LSHIFTRT (mode
, source
,
4676 /* If we have IOR or XOR, and the constant can be loaded in a
4677 single instruction, and we can find a temporary to put it in,
4678 then this can be done in two instructions instead of 3-4. */
4680 /* TARGET can't be NULL if SUBTARGETS is 0 */
4681 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4683 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4687 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4689 emit_constant_insn (cond
,
4690 gen_rtx_SET (sub
, GEN_INT (val
)));
4691 emit_constant_insn (cond
,
4692 gen_rtx_SET (target
,
4693 gen_rtx_fmt_ee (code
, mode
,
4704 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4705 and the remainder 0s for e.g. 0xfff00000)
4706 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4708 This can be done in 2 instructions by using shifts with mov or mvn.
4713 mvn r0, r0, lsr #12 */
4714 if (set_sign_bit_copies
> 8
4715 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4719 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4720 rtx shift
= GEN_INT (set_sign_bit_copies
);
4726 gen_rtx_ASHIFT (mode
,
4731 gen_rtx_SET (target
,
4733 gen_rtx_LSHIFTRT (mode
, sub
,
4740 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4742 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4744 For eg. r0 = r0 | 0xfff
4749 if (set_zero_bit_copies
> 8
4750 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4754 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4755 rtx shift
= GEN_INT (set_zero_bit_copies
);
4761 gen_rtx_LSHIFTRT (mode
,
4766 gen_rtx_SET (target
,
4768 gen_rtx_ASHIFT (mode
, sub
,
4774 /* This will never be reached for Thumb2 because orn is a valid
4775 instruction. This is for Thumb1 and the ARM 32 bit cases.
4777 x = y | constant (such that ~constant is a valid constant)
4779 x = ~(~y & ~constant).
4781 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4785 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4786 emit_constant_insn (cond
,
4788 gen_rtx_NOT (mode
, source
)));
4791 sub
= gen_reg_rtx (mode
);
4792 emit_constant_insn (cond
,
4794 gen_rtx_AND (mode
, source
,
4796 emit_constant_insn (cond
,
4797 gen_rtx_SET (target
,
4798 gen_rtx_NOT (mode
, sub
)));
4805 /* See if two shifts will do 2 or more insn's worth of work. */
4806 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4808 HOST_WIDE_INT shift_mask
= ((0xffffffff
4809 << (32 - clear_sign_bit_copies
))
4812 if ((remainder
| shift_mask
) != 0xffffffff)
4814 HOST_WIDE_INT new_val
4815 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4819 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4820 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4821 new_src
, source
, subtargets
, 1);
4826 rtx targ
= subtargets
? NULL_RTX
: target
;
4827 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4828 targ
, source
, subtargets
, 0);
4834 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4835 rtx shift
= GEN_INT (clear_sign_bit_copies
);
4837 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
4838 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
4844 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
4846 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
4848 if ((remainder
| shift_mask
) != 0xffffffff)
4850 HOST_WIDE_INT new_val
4851 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4854 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4856 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4857 new_src
, source
, subtargets
, 1);
4862 rtx targ
= subtargets
? NULL_RTX
: target
;
4864 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4865 targ
, source
, subtargets
, 0);
4871 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4872 rtx shift
= GEN_INT (clear_zero_bit_copies
);
4874 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
4875 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
4887 /* Calculate what the instruction sequences would be if we generated it
4888 normally, negated, or inverted. */
4890 /* AND cannot be split into multiple insns, so invert and use BIC. */
4893 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
4896 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
4901 if (can_invert
|| final_invert
)
4902 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
4907 immediates
= &pos_immediates
;
4909 /* Is the negated immediate sequence more efficient? */
4910 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
4913 immediates
= &neg_immediates
;
4918 /* Is the inverted immediate sequence more efficient?
4919 We must allow for an extra NOT instruction for XOR operations, although
4920 there is some chance that the final 'mvn' will get optimized later. */
4921 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
4924 immediates
= &inv_immediates
;
4932 /* Now output the chosen sequence as instructions. */
4935 for (i
= 0; i
< insns
; i
++)
4937 rtx new_src
, temp1_rtx
;
4939 temp1
= immediates
->i
[i
];
4941 if (code
== SET
|| code
== MINUS
)
4942 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
4943 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
4944 new_src
= gen_reg_rtx (mode
);
4950 else if (can_negate
)
4953 temp1
= trunc_int_for_mode (temp1
, mode
);
4954 temp1_rtx
= GEN_INT (temp1
);
4958 else if (code
== MINUS
)
4959 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
4961 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
4963 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
4968 can_negate
= can_invert
;
4972 else if (code
== MINUS
)
4980 emit_constant_insn (cond
, gen_rtx_SET (target
,
4981 gen_rtx_NOT (mode
, source
)));
4988 /* Canonicalize a comparison so that we are more likely to recognize it.
4989 This can be done for a few constant compares, where we can make the
4990 immediate value easier to load. */
4993 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
4994 bool op0_preserve_value
)
4997 unsigned HOST_WIDE_INT i
, maxval
;
4999 mode
= GET_MODE (*op0
);
5000 if (mode
== VOIDmode
)
5001 mode
= GET_MODE (*op1
);
5003 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5005 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5006 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5007 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5008 for GTU/LEU in Thumb mode. */
5012 if (*code
== GT
|| *code
== LE
5013 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5015 /* Missing comparison. First try to use an available
5017 if (CONST_INT_P (*op1
))
5025 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5027 *op1
= GEN_INT (i
+ 1);
5028 *code
= *code
== GT
? GE
: LT
;
5034 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5035 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5037 *op1
= GEN_INT (i
+ 1);
5038 *code
= *code
== GTU
? GEU
: LTU
;
5047 /* If that did not work, reverse the condition. */
5048 if (!op0_preserve_value
)
5050 std::swap (*op0
, *op1
);
5051 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5057 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5058 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5059 to facilitate possible combining with a cmp into 'ands'. */
5061 && GET_CODE (*op0
) == ZERO_EXTEND
5062 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5063 && GET_MODE (XEXP (*op0
, 0)) == QImode
5064 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5065 && subreg_lowpart_p (XEXP (*op0
, 0))
5066 && *op1
== const0_rtx
)
5067 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5070 /* Comparisons smaller than DImode. Only adjust comparisons against
5071 an out-of-range constant. */
5072 if (!CONST_INT_P (*op1
)
5073 || const_ok_for_arm (INTVAL (*op1
))
5074 || const_ok_for_arm (- INTVAL (*op1
)))
5088 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5090 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5091 *code
= *code
== GT
? GE
: LT
;
5099 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5101 *op1
= GEN_INT (i
- 1);
5102 *code
= *code
== GE
? GT
: LE
;
5109 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5110 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5112 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5113 *code
= *code
== GTU
? GEU
: LTU
;
5121 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5123 *op1
= GEN_INT (i
- 1);
5124 *code
= *code
== GEU
? GTU
: LEU
;
5135 /* Define how to find the value returned by a function. */
5138 arm_function_value(const_tree type
, const_tree func
,
5139 bool outgoing ATTRIBUTE_UNUSED
)
5142 int unsignedp ATTRIBUTE_UNUSED
;
5143 rtx r ATTRIBUTE_UNUSED
;
5145 mode
= TYPE_MODE (type
);
5147 if (TARGET_AAPCS_BASED
)
5148 return aapcs_allocate_return_reg (mode
, type
, func
);
5150 /* Promote integer types. */
5151 if (INTEGRAL_TYPE_P (type
))
5152 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5154 /* Promotes small structs returned in a register to full-word size
5155 for big-endian AAPCS. */
5156 if (arm_return_in_msb (type
))
5158 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5159 if (size
% UNITS_PER_WORD
!= 0)
5161 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5162 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5166 return arm_libcall_value_1 (mode
);
5169 /* libcall hashtable helpers. */
5171 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5173 static inline hashval_t
hash (const rtx_def
*);
5174 static inline bool equal (const rtx_def
*, const rtx_def
*);
5175 static inline void remove (rtx_def
*);
5179 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5181 return rtx_equal_p (p1
, p2
);
5185 libcall_hasher::hash (const rtx_def
*p1
)
5187 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5190 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5193 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5195 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5199 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5201 static bool init_done
= false;
5202 static libcall_table_type
*libcall_htab
= NULL
;
5208 libcall_htab
= new libcall_table_type (31);
5209 add_libcall (libcall_htab
,
5210 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5211 add_libcall (libcall_htab
,
5212 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5213 add_libcall (libcall_htab
,
5214 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5215 add_libcall (libcall_htab
,
5216 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5218 add_libcall (libcall_htab
,
5219 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5220 add_libcall (libcall_htab
,
5221 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5222 add_libcall (libcall_htab
,
5223 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5224 add_libcall (libcall_htab
,
5225 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5227 add_libcall (libcall_htab
,
5228 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5229 add_libcall (libcall_htab
,
5230 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5231 add_libcall (libcall_htab
,
5232 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5233 add_libcall (libcall_htab
,
5234 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5235 add_libcall (libcall_htab
,
5236 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5237 add_libcall (libcall_htab
,
5238 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5239 add_libcall (libcall_htab
,
5240 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5241 add_libcall (libcall_htab
,
5242 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5244 /* Values from double-precision helper functions are returned in core
5245 registers if the selected core only supports single-precision
5246 arithmetic, even if we are using the hard-float ABI. The same is
5247 true for single-precision helpers, but we will never be using the
5248 hard-float ABI on a CPU which doesn't support single-precision
5249 operations in hardware. */
5250 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5251 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5252 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5253 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5254 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5255 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5256 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5257 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5258 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5259 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5260 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5261 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5263 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5267 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5271 arm_libcall_value_1 (machine_mode mode
)
5273 if (TARGET_AAPCS_BASED
)
5274 return aapcs_libcall_value (mode
);
5275 else if (TARGET_IWMMXT_ABI
5276 && arm_vector_mode_supported_p (mode
))
5277 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5279 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5282 /* Define how to find the value returned by a library function
5283 assuming the value has mode MODE. */
5286 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5288 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5289 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5291 /* The following libcalls return their result in integer registers,
5292 even though they return a floating point value. */
5293 if (arm_libcall_uses_aapcs_base (libcall
))
5294 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5298 return arm_libcall_value_1 (mode
);
5301 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5304 arm_function_value_regno_p (const unsigned int regno
)
5306 if (regno
== ARG_REGISTER (1)
5308 && TARGET_AAPCS_BASED
5309 && TARGET_HARD_FLOAT
5310 && regno
== FIRST_VFP_REGNUM
)
5311 || (TARGET_IWMMXT_ABI
5312 && regno
== FIRST_IWMMXT_REGNUM
))
5318 /* Determine the amount of memory needed to store the possible return
5319 registers of an untyped call. */
5321 arm_apply_result_size (void)
5327 if (TARGET_HARD_FLOAT_ABI
)
5329 if (TARGET_IWMMXT_ABI
)
5336 /* Decide whether TYPE should be returned in memory (true)
5337 or in a register (false). FNTYPE is the type of the function making
5340 arm_return_in_memory (const_tree type
, const_tree fntype
)
5344 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5346 if (TARGET_AAPCS_BASED
)
5348 /* Simple, non-aggregate types (ie not including vectors and
5349 complex) are always returned in a register (or registers).
5350 We don't care about which register here, so we can short-cut
5351 some of the detail. */
5352 if (!AGGREGATE_TYPE_P (type
)
5353 && TREE_CODE (type
) != VECTOR_TYPE
5354 && TREE_CODE (type
) != COMPLEX_TYPE
)
5357 /* Any return value that is no larger than one word can be
5359 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5362 /* Check any available co-processors to see if they accept the
5363 type as a register candidate (VFP, for example, can return
5364 some aggregates in consecutive registers). These aren't
5365 available if the call is variadic. */
5366 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5369 /* Vector values should be returned using ARM registers, not
5370 memory (unless they're over 16 bytes, which will break since
5371 we only have four call-clobbered registers to play with). */
5372 if (TREE_CODE (type
) == VECTOR_TYPE
)
5373 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5375 /* The rest go in memory. */
5379 if (TREE_CODE (type
) == VECTOR_TYPE
)
5380 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5382 if (!AGGREGATE_TYPE_P (type
) &&
5383 (TREE_CODE (type
) != VECTOR_TYPE
))
5384 /* All simple types are returned in registers. */
5387 if (arm_abi
!= ARM_ABI_APCS
)
5389 /* ATPCS and later return aggregate types in memory only if they are
5390 larger than a word (or are variable size). */
5391 return (size
< 0 || size
> UNITS_PER_WORD
);
5394 /* For the arm-wince targets we choose to be compatible with Microsoft's
5395 ARM and Thumb compilers, which always return aggregates in memory. */
5397 /* All structures/unions bigger than one word are returned in memory.
5398 Also catch the case where int_size_in_bytes returns -1. In this case
5399 the aggregate is either huge or of variable size, and in either case
5400 we will want to return it via memory and not in a register. */
5401 if (size
< 0 || size
> UNITS_PER_WORD
)
5404 if (TREE_CODE (type
) == RECORD_TYPE
)
5408 /* For a struct the APCS says that we only return in a register
5409 if the type is 'integer like' and every addressable element
5410 has an offset of zero. For practical purposes this means
5411 that the structure can have at most one non bit-field element
5412 and that this element must be the first one in the structure. */
5414 /* Find the first field, ignoring non FIELD_DECL things which will
5415 have been created by C++. */
5416 for (field
= TYPE_FIELDS (type
);
5417 field
&& TREE_CODE (field
) != FIELD_DECL
;
5418 field
= DECL_CHAIN (field
))
5422 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5424 /* Check that the first field is valid for returning in a register. */
5426 /* ... Floats are not allowed */
5427 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5430 /* ... Aggregates that are not themselves valid for returning in
5431 a register are not allowed. */
5432 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5435 /* Now check the remaining fields, if any. Only bitfields are allowed,
5436 since they are not addressable. */
5437 for (field
= DECL_CHAIN (field
);
5439 field
= DECL_CHAIN (field
))
5441 if (TREE_CODE (field
) != FIELD_DECL
)
5444 if (!DECL_BIT_FIELD_TYPE (field
))
5451 if (TREE_CODE (type
) == UNION_TYPE
)
5455 /* Unions can be returned in registers if every element is
5456 integral, or can be returned in an integer register. */
5457 for (field
= TYPE_FIELDS (type
);
5459 field
= DECL_CHAIN (field
))
5461 if (TREE_CODE (field
) != FIELD_DECL
)
5464 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5467 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5473 #endif /* not ARM_WINCE */
5475 /* Return all other types in memory. */
5479 const struct pcs_attribute_arg
5483 } pcs_attribute_args
[] =
5485 {"aapcs", ARM_PCS_AAPCS
},
5486 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5488 /* We could recognize these, but changes would be needed elsewhere
5489 * to implement them. */
5490 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5491 {"atpcs", ARM_PCS_ATPCS
},
5492 {"apcs", ARM_PCS_APCS
},
5494 {NULL
, ARM_PCS_UNKNOWN
}
5498 arm_pcs_from_attribute (tree attr
)
5500 const struct pcs_attribute_arg
*ptr
;
5503 /* Get the value of the argument. */
5504 if (TREE_VALUE (attr
) == NULL_TREE
5505 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5506 return ARM_PCS_UNKNOWN
;
5508 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5510 /* Check it against the list of known arguments. */
5511 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5512 if (streq (arg
, ptr
->arg
))
5515 /* An unrecognized interrupt type. */
5516 return ARM_PCS_UNKNOWN
;
5519 /* Get the PCS variant to use for this call. TYPE is the function's type
5520 specification, DECL is the specific declartion. DECL may be null if
5521 the call could be indirect or if this is a library call. */
5523 arm_get_pcs_model (const_tree type
, const_tree decl
)
5525 bool user_convention
= false;
5526 enum arm_pcs user_pcs
= arm_pcs_default
;
5531 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5534 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5535 user_convention
= true;
5538 if (TARGET_AAPCS_BASED
)
5540 /* Detect varargs functions. These always use the base rules
5541 (no argument is ever a candidate for a co-processor
5543 bool base_rules
= stdarg_p (type
);
5545 if (user_convention
)
5547 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5548 sorry ("non-AAPCS derived PCS variant");
5549 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5550 error ("variadic functions must use the base AAPCS variant");
5554 return ARM_PCS_AAPCS
;
5555 else if (user_convention
)
5557 else if (decl
&& flag_unit_at_a_time
)
5559 /* Local functions never leak outside this compilation unit,
5560 so we are free to use whatever conventions are
5562 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5563 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5565 return ARM_PCS_AAPCS_LOCAL
;
5568 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5569 sorry ("PCS variant");
5571 /* For everything else we use the target's default. */
5572 return arm_pcs_default
;
5577 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5578 const_tree fntype ATTRIBUTE_UNUSED
,
5579 rtx libcall ATTRIBUTE_UNUSED
,
5580 const_tree fndecl ATTRIBUTE_UNUSED
)
5582 /* Record the unallocated VFP registers. */
5583 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5584 pcum
->aapcs_vfp_reg_alloc
= 0;
5587 /* Walk down the type tree of TYPE counting consecutive base elements.
5588 If *MODEP is VOIDmode, then set it to the first valid floating point
5589 type. If a non-floating point type is found, or if a floating point
5590 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5591 otherwise return the count in the sub-tree. */
5593 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5598 switch (TREE_CODE (type
))
5601 mode
= TYPE_MODE (type
);
5602 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5605 if (*modep
== VOIDmode
)
5614 mode
= TYPE_MODE (TREE_TYPE (type
));
5615 if (mode
!= DFmode
&& mode
!= SFmode
)
5618 if (*modep
== VOIDmode
)
5627 /* Use V2SImode and V4SImode as representatives of all 64-bit
5628 and 128-bit vector types, whether or not those modes are
5629 supported with the present options. */
5630 size
= int_size_in_bytes (type
);
5643 if (*modep
== VOIDmode
)
5646 /* Vector modes are considered to be opaque: two vectors are
5647 equivalent for the purposes of being homogeneous aggregates
5648 if they are the same size. */
5657 tree index
= TYPE_DOMAIN (type
);
5659 /* Can't handle incomplete types nor sizes that are not
5661 if (!COMPLETE_TYPE_P (type
)
5662 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5665 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5668 || !TYPE_MAX_VALUE (index
)
5669 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5670 || !TYPE_MIN_VALUE (index
)
5671 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5675 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5676 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5678 /* There must be no padding. */
5679 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5691 /* Can't handle incomplete types nor sizes that are not
5693 if (!COMPLETE_TYPE_P (type
)
5694 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5697 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5699 if (TREE_CODE (field
) != FIELD_DECL
)
5702 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5708 /* There must be no padding. */
5709 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5716 case QUAL_UNION_TYPE
:
5718 /* These aren't very interesting except in a degenerate case. */
5723 /* Can't handle incomplete types nor sizes that are not
5725 if (!COMPLETE_TYPE_P (type
)
5726 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5729 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5731 if (TREE_CODE (field
) != FIELD_DECL
)
5734 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5737 count
= count
> sub_count
? count
: sub_count
;
5740 /* There must be no padding. */
5741 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5754 /* Return true if PCS_VARIANT should use VFP registers. */
5756 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5758 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5760 static bool seen_thumb1_vfp
= false;
5762 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5764 sorry ("Thumb-1 hard-float VFP ABI");
5765 /* sorry() is not immediately fatal, so only display this once. */
5766 seen_thumb1_vfp
= true;
5772 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5775 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5776 (TARGET_VFP_DOUBLE
|| !is_double
));
5779 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5780 suitable for passing or returning in VFP registers for the PCS
5781 variant selected. If it is, then *BASE_MODE is updated to contain
5782 a machine mode describing each element of the argument's type and
5783 *COUNT to hold the number of such elements. */
5785 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5786 machine_mode mode
, const_tree type
,
5787 machine_mode
*base_mode
, int *count
)
5789 machine_mode new_mode
= VOIDmode
;
5791 /* If we have the type information, prefer that to working things
5792 out from the mode. */
5795 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5797 if (ag_count
> 0 && ag_count
<= 4)
5802 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5803 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5804 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5809 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5812 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5818 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5821 *base_mode
= new_mode
;
5826 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5827 machine_mode mode
, const_tree type
)
5829 int count ATTRIBUTE_UNUSED
;
5830 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5832 if (!use_vfp_abi (pcs_variant
, false))
5834 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5839 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5842 if (!use_vfp_abi (pcum
->pcs_variant
, false))
5845 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
5846 &pcum
->aapcs_vfp_rmode
,
5847 &pcum
->aapcs_vfp_rcount
);
5850 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5851 for the behaviour of this function. */
5854 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
5855 const_tree type ATTRIBUTE_UNUSED
)
5858 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
5859 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
5860 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
5863 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
5864 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
5866 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
5868 || (mode
== TImode
&& ! TARGET_NEON
)
5869 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
5872 int rcount
= pcum
->aapcs_vfp_rcount
;
5874 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
5878 /* Avoid using unsupported vector modes. */
5879 if (rmode
== V2SImode
)
5881 else if (rmode
== V4SImode
)
5888 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
5889 for (i
= 0; i
< rcount
; i
++)
5891 rtx tmp
= gen_rtx_REG (rmode
,
5892 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
5893 tmp
= gen_rtx_EXPR_LIST
5895 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
5896 XVECEXP (par
, 0, i
) = tmp
;
5899 pcum
->aapcs_reg
= par
;
5902 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
5908 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5909 comment there for the behaviour of this function. */
5912 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
5914 const_tree type ATTRIBUTE_UNUSED
)
5916 if (!use_vfp_abi (pcs_variant
, false))
5920 || (GET_MODE_CLASS (mode
) == MODE_INT
5921 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
5925 machine_mode ag_mode
;
5930 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
5935 if (ag_mode
== V2SImode
)
5937 else if (ag_mode
== V4SImode
)
5943 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
5944 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
5945 for (i
= 0; i
< count
; i
++)
5947 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
5948 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
5949 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
5950 XVECEXP (par
, 0, i
) = tmp
;
5956 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
5960 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5961 machine_mode mode ATTRIBUTE_UNUSED
,
5962 const_tree type ATTRIBUTE_UNUSED
)
5964 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
5965 pcum
->aapcs_vfp_reg_alloc
= 0;
5969 #define AAPCS_CP(X) \
5971 aapcs_ ## X ## _cum_init, \
5972 aapcs_ ## X ## _is_call_candidate, \
5973 aapcs_ ## X ## _allocate, \
5974 aapcs_ ## X ## _is_return_candidate, \
5975 aapcs_ ## X ## _allocate_return_reg, \
5976 aapcs_ ## X ## _advance \
5979 /* Table of co-processors that can be used to pass arguments in
5980 registers. Idealy no arugment should be a candidate for more than
5981 one co-processor table entry, but the table is processed in order
5982 and stops after the first match. If that entry then fails to put
5983 the argument into a co-processor register, the argument will go on
5987 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5988 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
5990 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5991 BLKmode) is a candidate for this co-processor's registers; this
5992 function should ignore any position-dependent state in
5993 CUMULATIVE_ARGS and only use call-type dependent information. */
5994 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
5996 /* Return true if the argument does get a co-processor register; it
5997 should set aapcs_reg to an RTX of the register allocated as is
5998 required for a return from FUNCTION_ARG. */
5999 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6001 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6002 be returned in this co-processor's registers. */
6003 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6005 /* Allocate and return an RTX element to hold the return type of a call. This
6006 routine must not fail and will only be called if is_return_candidate
6007 returned true with the same parameters. */
6008 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6010 /* Finish processing this argument and prepare to start processing
6012 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6013 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6021 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6026 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6027 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6034 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6036 /* We aren't passed a decl, so we can't check that a call is local.
6037 However, it isn't clear that that would be a win anyway, since it
6038 might limit some tail-calling opportunities. */
6039 enum arm_pcs pcs_variant
;
6043 const_tree fndecl
= NULL_TREE
;
6045 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6048 fntype
= TREE_TYPE (fntype
);
6051 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6054 pcs_variant
= arm_pcs_default
;
6056 if (pcs_variant
!= ARM_PCS_AAPCS
)
6060 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6061 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6070 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6073 /* We aren't passed a decl, so we can't check that a call is local.
6074 However, it isn't clear that that would be a win anyway, since it
6075 might limit some tail-calling opportunities. */
6076 enum arm_pcs pcs_variant
;
6077 int unsignedp ATTRIBUTE_UNUSED
;
6081 const_tree fndecl
= NULL_TREE
;
6083 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6086 fntype
= TREE_TYPE (fntype
);
6089 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6092 pcs_variant
= arm_pcs_default
;
6094 /* Promote integer types. */
6095 if (type
&& INTEGRAL_TYPE_P (type
))
6096 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6098 if (pcs_variant
!= ARM_PCS_AAPCS
)
6102 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6103 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6105 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6109 /* Promotes small structs returned in a register to full-word size
6110 for big-endian AAPCS. */
6111 if (type
&& arm_return_in_msb (type
))
6113 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6114 if (size
% UNITS_PER_WORD
!= 0)
6116 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6117 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6121 return gen_rtx_REG (mode
, R0_REGNUM
);
6125 aapcs_libcall_value (machine_mode mode
)
6127 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6128 && GET_MODE_SIZE (mode
) <= 4)
6131 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6134 /* Lay out a function argument using the AAPCS rules. The rule
6135 numbers referred to here are those in the AAPCS. */
6137 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6138 const_tree type
, bool named
)
6143 /* We only need to do this once per argument. */
6144 if (pcum
->aapcs_arg_processed
)
6147 pcum
->aapcs_arg_processed
= true;
6149 /* Special case: if named is false then we are handling an incoming
6150 anonymous argument which is on the stack. */
6154 /* Is this a potential co-processor register candidate? */
6155 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6157 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6158 pcum
->aapcs_cprc_slot
= slot
;
6160 /* We don't have to apply any of the rules from part B of the
6161 preparation phase, these are handled elsewhere in the
6166 /* A Co-processor register candidate goes either in its own
6167 class of registers or on the stack. */
6168 if (!pcum
->aapcs_cprc_failed
[slot
])
6170 /* C1.cp - Try to allocate the argument to co-processor
6172 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6175 /* C2.cp - Put the argument on the stack and note that we
6176 can't assign any more candidates in this slot. We also
6177 need to note that we have allocated stack space, so that
6178 we won't later try to split a non-cprc candidate between
6179 core registers and the stack. */
6180 pcum
->aapcs_cprc_failed
[slot
] = true;
6181 pcum
->can_split
= false;
6184 /* We didn't get a register, so this argument goes on the
6186 gcc_assert (pcum
->can_split
== false);
6191 /* C3 - For double-word aligned arguments, round the NCRN up to the
6192 next even number. */
6193 ncrn
= pcum
->aapcs_ncrn
;
6194 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
6197 nregs
= ARM_NUM_REGS2(mode
, type
);
6199 /* Sigh, this test should really assert that nregs > 0, but a GCC
6200 extension allows empty structs and then gives them empty size; it
6201 then allows such a structure to be passed by value. For some of
6202 the code below we have to pretend that such an argument has
6203 non-zero size so that we 'locate' it correctly either in
6204 registers or on the stack. */
6205 gcc_assert (nregs
>= 0);
6207 nregs2
= nregs
? nregs
: 1;
6209 /* C4 - Argument fits entirely in core registers. */
6210 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6212 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6213 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6217 /* C5 - Some core registers left and there are no arguments already
6218 on the stack: split this argument between the remaining core
6219 registers and the stack. */
6220 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6222 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6223 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6224 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6228 /* C6 - NCRN is set to 4. */
6229 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6231 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6235 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6236 for a call to a function whose data type is FNTYPE.
6237 For a library call, FNTYPE is NULL. */
6239 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6241 tree fndecl ATTRIBUTE_UNUSED
)
6243 /* Long call handling. */
6245 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6247 pcum
->pcs_variant
= arm_pcs_default
;
6249 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6251 if (arm_libcall_uses_aapcs_base (libname
))
6252 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6254 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6255 pcum
->aapcs_reg
= NULL_RTX
;
6256 pcum
->aapcs_partial
= 0;
6257 pcum
->aapcs_arg_processed
= false;
6258 pcum
->aapcs_cprc_slot
= -1;
6259 pcum
->can_split
= true;
6261 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6265 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6267 pcum
->aapcs_cprc_failed
[i
] = false;
6268 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6276 /* On the ARM, the offset starts at 0. */
6278 pcum
->iwmmxt_nregs
= 0;
6279 pcum
->can_split
= true;
6281 /* Varargs vectors are treated the same as long long.
6282 named_count avoids having to change the way arm handles 'named' */
6283 pcum
->named_count
= 0;
6286 if (TARGET_REALLY_IWMMXT
&& fntype
)
6290 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6292 fn_arg
= TREE_CHAIN (fn_arg
))
6293 pcum
->named_count
+= 1;
6295 if (! pcum
->named_count
)
6296 pcum
->named_count
= INT_MAX
;
6300 /* Return true if mode/type need doubleword alignment. */
6302 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6305 return PARM_BOUNDARY
< GET_MODE_ALIGNMENT (mode
);
6307 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6308 if (!AGGREGATE_TYPE_P (type
))
6309 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6311 /* Array types: Use member alignment of element type. */
6312 if (TREE_CODE (type
) == ARRAY_TYPE
)
6313 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6315 /* Record/aggregate types: Use greatest member alignment of any member. */
6316 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6317 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6324 /* Determine where to put an argument to a function.
6325 Value is zero to push the argument on the stack,
6326 or a hard register in which to store the argument.
6328 MODE is the argument's machine mode.
6329 TYPE is the data type of the argument (as a tree).
6330 This is null for libcalls where that information may
6332 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6333 the preceding args and about the function being called.
6334 NAMED is nonzero if this argument is a named parameter
6335 (otherwise it is an extra parameter matching an ellipsis).
6337 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6338 other arguments are passed on the stack. If (NAMED == 0) (which happens
6339 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6340 defined), say it is passed in the stack (function_prologue will
6341 indeed make it pass in the stack if necessary). */
6344 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6345 const_tree type
, bool named
)
6347 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6350 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6351 a call insn (op3 of a call_value insn). */
6352 if (mode
== VOIDmode
)
6355 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6357 aapcs_layout_arg (pcum
, mode
, type
, named
);
6358 return pcum
->aapcs_reg
;
6361 /* Varargs vectors are treated the same as long long.
6362 named_count avoids having to change the way arm handles 'named' */
6363 if (TARGET_IWMMXT_ABI
6364 && arm_vector_mode_supported_p (mode
)
6365 && pcum
->named_count
> pcum
->nargs
+ 1)
6367 if (pcum
->iwmmxt_nregs
<= 9)
6368 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6371 pcum
->can_split
= false;
6376 /* Put doubleword aligned quantities in even register pairs. */
6378 && ARM_DOUBLEWORD_ALIGN
6379 && arm_needs_doubleword_align (mode
, type
))
6382 /* Only allow splitting an arg between regs and memory if all preceding
6383 args were allocated to regs. For args passed by reference we only count
6384 the reference pointer. */
6385 if (pcum
->can_split
)
6388 nregs
= ARM_NUM_REGS2 (mode
, type
);
6390 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6393 return gen_rtx_REG (mode
, pcum
->nregs
);
6397 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6399 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
6400 ? DOUBLEWORD_ALIGNMENT
6405 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6406 tree type
, bool named
)
6408 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6409 int nregs
= pcum
->nregs
;
6411 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6413 aapcs_layout_arg (pcum
, mode
, type
, named
);
6414 return pcum
->aapcs_partial
;
6417 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6420 if (NUM_ARG_REGS
> nregs
6421 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6423 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6428 /* Update the data in PCUM to advance over an argument
6429 of mode MODE and data type TYPE.
6430 (TYPE is null for libcalls where that information may not be available.) */
6433 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6434 const_tree type
, bool named
)
6436 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6438 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6440 aapcs_layout_arg (pcum
, mode
, type
, named
);
6442 if (pcum
->aapcs_cprc_slot
>= 0)
6444 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6446 pcum
->aapcs_cprc_slot
= -1;
6449 /* Generic stuff. */
6450 pcum
->aapcs_arg_processed
= false;
6451 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6452 pcum
->aapcs_reg
= NULL_RTX
;
6453 pcum
->aapcs_partial
= 0;
6458 if (arm_vector_mode_supported_p (mode
)
6459 && pcum
->named_count
> pcum
->nargs
6460 && TARGET_IWMMXT_ABI
)
6461 pcum
->iwmmxt_nregs
+= 1;
6463 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6467 /* Variable sized types are passed by reference. This is a GCC
6468 extension to the ARM ABI. */
6471 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6472 machine_mode mode ATTRIBUTE_UNUSED
,
6473 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6475 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6478 /* Encode the current state of the #pragma [no_]long_calls. */
6481 OFF
, /* No #pragma [no_]long_calls is in effect. */
6482 LONG
, /* #pragma long_calls is in effect. */
6483 SHORT
/* #pragma no_long_calls is in effect. */
6486 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6489 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6491 arm_pragma_long_calls
= LONG
;
6495 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6497 arm_pragma_long_calls
= SHORT
;
6501 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6503 arm_pragma_long_calls
= OFF
;
6506 /* Handle an attribute requiring a FUNCTION_DECL;
6507 arguments as in struct attribute_spec.handler. */
6509 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6510 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6512 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6514 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6516 *no_add_attrs
= true;
6522 /* Handle an "interrupt" or "isr" attribute;
6523 arguments as in struct attribute_spec.handler. */
6525 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6530 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6532 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6534 *no_add_attrs
= true;
6536 /* FIXME: the argument if any is checked for type attributes;
6537 should it be checked for decl ones? */
6541 if (TREE_CODE (*node
) == FUNCTION_TYPE
6542 || TREE_CODE (*node
) == METHOD_TYPE
)
6544 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6546 warning (OPT_Wattributes
, "%qE attribute ignored",
6548 *no_add_attrs
= true;
6551 else if (TREE_CODE (*node
) == POINTER_TYPE
6552 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6553 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6554 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6556 *node
= build_variant_type_copy (*node
);
6557 TREE_TYPE (*node
) = build_type_attribute_variant
6559 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6560 *no_add_attrs
= true;
6564 /* Possibly pass this attribute on from the type to a decl. */
6565 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6566 | (int) ATTR_FLAG_FUNCTION_NEXT
6567 | (int) ATTR_FLAG_ARRAY_NEXT
))
6569 *no_add_attrs
= true;
6570 return tree_cons (name
, args
, NULL_TREE
);
6574 warning (OPT_Wattributes
, "%qE attribute ignored",
6583 /* Handle a "pcs" attribute; arguments as in struct
6584 attribute_spec.handler. */
6586 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6587 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6589 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6591 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6592 *no_add_attrs
= true;
6597 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6598 /* Handle the "notshared" attribute. This attribute is another way of
6599 requesting hidden visibility. ARM's compiler supports
6600 "__declspec(notshared)"; we support the same thing via an
6604 arm_handle_notshared_attribute (tree
*node
,
6605 tree name ATTRIBUTE_UNUSED
,
6606 tree args ATTRIBUTE_UNUSED
,
6607 int flags ATTRIBUTE_UNUSED
,
6610 tree decl
= TYPE_NAME (*node
);
6614 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6615 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6616 *no_add_attrs
= false;
6622 /* Return 0 if the attributes for two types are incompatible, 1 if they
6623 are compatible, and 2 if they are nearly compatible (which causes a
6624 warning to be generated). */
6626 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
6630 /* Check for mismatch of non-default calling convention. */
6631 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
6634 /* Check for mismatched call attributes. */
6635 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6636 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6637 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
6638 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
6640 /* Only bother to check if an attribute is defined. */
6641 if (l1
| l2
| s1
| s2
)
6643 /* If one type has an attribute, the other must have the same attribute. */
6644 if ((l1
!= l2
) || (s1
!= s2
))
6647 /* Disallow mixed attributes. */
6648 if ((l1
& s2
) || (l2
& s1
))
6652 /* Check for mismatched ISR attribute. */
6653 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
6655 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
6656 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
6658 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
6665 /* Assigns default attributes to newly defined type. This is used to
6666 set short_call/long_call attributes for function types of
6667 functions defined inside corresponding #pragma scopes. */
6669 arm_set_default_type_attributes (tree type
)
6671 /* Add __attribute__ ((long_call)) to all functions, when
6672 inside #pragma long_calls or __attribute__ ((short_call)),
6673 when inside #pragma no_long_calls. */
6674 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
6676 tree type_attr_list
, attr_name
;
6677 type_attr_list
= TYPE_ATTRIBUTES (type
);
6679 if (arm_pragma_long_calls
== LONG
)
6680 attr_name
= get_identifier ("long_call");
6681 else if (arm_pragma_long_calls
== SHORT
)
6682 attr_name
= get_identifier ("short_call");
6686 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
6687 TYPE_ATTRIBUTES (type
) = type_attr_list
;
6691 /* Return true if DECL is known to be linked into section SECTION. */
6694 arm_function_in_section_p (tree decl
, section
*section
)
6696 /* We can only be certain about the prevailing symbol definition. */
6697 if (!decl_binds_to_current_def_p (decl
))
6700 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6701 if (!DECL_SECTION_NAME (decl
))
6703 /* Make sure that we will not create a unique section for DECL. */
6704 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
6708 return function_section (decl
) == section
;
6711 /* Return nonzero if a 32-bit "long_call" should be generated for
6712 a call from the current function to DECL. We generate a long_call
6715 a. has an __attribute__((long call))
6716 or b. is within the scope of a #pragma long_calls
6717 or c. the -mlong-calls command line switch has been specified
6719 However we do not generate a long call if the function:
6721 d. has an __attribute__ ((short_call))
6722 or e. is inside the scope of a #pragma no_long_calls
6723 or f. is defined in the same section as the current function. */
6726 arm_is_long_call_p (tree decl
)
6731 return TARGET_LONG_CALLS
;
6733 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
6734 if (lookup_attribute ("short_call", attrs
))
6737 /* For "f", be conservative, and only cater for cases in which the
6738 whole of the current function is placed in the same section. */
6739 if (!flag_reorder_blocks_and_partition
6740 && TREE_CODE (decl
) == FUNCTION_DECL
6741 && arm_function_in_section_p (decl
, current_function_section ()))
6744 if (lookup_attribute ("long_call", attrs
))
6747 return TARGET_LONG_CALLS
;
6750 /* Return nonzero if it is ok to make a tail-call to DECL. */
6752 arm_function_ok_for_sibcall (tree decl
, tree exp
)
6754 unsigned long func_type
;
6756 if (cfun
->machine
->sibcall_blocked
)
6759 /* Never tailcall something if we are generating code for Thumb-1. */
6763 /* The PIC register is live on entry to VxWorks PLT entries, so we
6764 must make the call before restoring the PIC register. */
6765 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
6768 /* If we are interworking and the function is not declared static
6769 then we can't tail-call it unless we know that it exists in this
6770 compilation unit (since it might be a Thumb routine). */
6771 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
6772 && !TREE_ASM_WRITTEN (decl
))
6775 func_type
= arm_current_func_type ();
6776 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6777 if (IS_INTERRUPT (func_type
))
6780 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
6782 /* Check that the return value locations are the same. For
6783 example that we aren't returning a value from the sibling in
6784 a VFP register but then need to transfer it to a core
6787 tree decl_or_type
= decl
;
6789 /* If it is an indirect function pointer, get the function type. */
6791 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
6793 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
6794 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
6796 if (!rtx_equal_p (a
, b
))
6800 /* Never tailcall if function may be called with a misaligned SP. */
6801 if (IS_STACKALIGN (func_type
))
6804 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6805 references should become a NOP. Don't convert such calls into
6807 if (TARGET_AAPCS_BASED
6808 && arm_abi
== ARM_ABI_AAPCS
6810 && DECL_WEAK (decl
))
6813 /* Everything else is ok. */
6818 /* Addressing mode support functions. */
6820 /* Return nonzero if X is a legitimate immediate operand when compiling
6821 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6823 legitimate_pic_operand_p (rtx x
)
6825 if (GET_CODE (x
) == SYMBOL_REF
6826 || (GET_CODE (x
) == CONST
6827 && GET_CODE (XEXP (x
, 0)) == PLUS
6828 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6834 /* Record that the current function needs a PIC register. Initialize
6835 cfun->machine->pic_reg if we have not already done so. */
6838 require_pic_register (void)
6840 /* A lot of the logic here is made obscure by the fact that this
6841 routine gets called as part of the rtx cost estimation process.
6842 We don't want those calls to affect any assumptions about the real
6843 function; and further, we can't call entry_of_function() until we
6844 start the real expansion process. */
6845 if (!crtl
->uses_pic_offset_table
)
6847 gcc_assert (can_create_pseudo_p ());
6848 if (arm_pic_register
!= INVALID_REGNUM
6849 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
6851 if (!cfun
->machine
->pic_reg
)
6852 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
6854 /* Play games to avoid marking the function as needing pic
6855 if we are being called as part of the cost-estimation
6857 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6858 crtl
->uses_pic_offset_table
= 1;
6862 rtx_insn
*seq
, *insn
;
6864 if (!cfun
->machine
->pic_reg
)
6865 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
6867 /* Play games to avoid marking the function as needing pic
6868 if we are being called as part of the cost-estimation
6870 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
6872 crtl
->uses_pic_offset_table
= 1;
6875 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
6876 && arm_pic_register
> LAST_LO_REGNUM
)
6877 emit_move_insn (cfun
->machine
->pic_reg
,
6878 gen_rtx_REG (Pmode
, arm_pic_register
));
6880 arm_load_pic_register (0UL);
6885 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
6887 INSN_LOCATION (insn
) = prologue_location
;
6889 /* We can be called during expansion of PHI nodes, where
6890 we can't yet emit instructions directly in the final
6891 insn stream. Queue the insns on the entry edge, they will
6892 be committed after everything else is expanded. */
6893 insert_insn_on_edge (seq
,
6894 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
6901 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
6903 if (GET_CODE (orig
) == SYMBOL_REF
6904 || GET_CODE (orig
) == LABEL_REF
)
6910 gcc_assert (can_create_pseudo_p ());
6911 reg
= gen_reg_rtx (Pmode
);
6914 /* VxWorks does not impose a fixed gap between segments; the run-time
6915 gap can be different from the object-file gap. We therefore can't
6916 use GOTOFF unless we are absolutely sure that the symbol is in the
6917 same segment as the GOT. Unfortunately, the flexibility of linker
6918 scripts means that we can't be sure of that in general, so assume
6919 that GOTOFF is never valid on VxWorks. */
6920 if ((GET_CODE (orig
) == LABEL_REF
6921 || (GET_CODE (orig
) == SYMBOL_REF
&&
6922 SYMBOL_REF_LOCAL_P (orig
)))
6924 && arm_pic_data_is_text_relative
)
6925 insn
= arm_pic_static_addr (orig
, reg
);
6931 /* If this function doesn't have a pic register, create one now. */
6932 require_pic_register ();
6934 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
6936 /* Make the MEM as close to a constant as possible. */
6937 mem
= SET_SRC (pat
);
6938 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
6939 MEM_READONLY_P (mem
) = 1;
6940 MEM_NOTRAP_P (mem
) = 1;
6942 insn
= emit_insn (pat
);
6945 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6947 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
6951 else if (GET_CODE (orig
) == CONST
)
6955 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6956 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
6959 /* Handle the case where we have: const (UNSPEC_TLS). */
6960 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
6961 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
6964 /* Handle the case where we have:
6965 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6967 if (GET_CODE (XEXP (orig
, 0)) == PLUS
6968 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
6969 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
6971 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
6977 gcc_assert (can_create_pseudo_p ());
6978 reg
= gen_reg_rtx (Pmode
);
6981 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
6983 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
6984 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
6985 base
== reg
? 0 : reg
);
6987 if (CONST_INT_P (offset
))
6989 /* The base register doesn't really matter, we only want to
6990 test the index for the appropriate mode. */
6991 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
6993 gcc_assert (can_create_pseudo_p ());
6994 offset
= force_reg (Pmode
, offset
);
6997 if (CONST_INT_P (offset
))
6998 return plus_constant (Pmode
, base
, INTVAL (offset
));
7001 if (GET_MODE_SIZE (mode
) > 4
7002 && (GET_MODE_CLASS (mode
) == MODE_INT
7003 || TARGET_SOFT_FLOAT
))
7005 emit_insn (gen_addsi3 (reg
, base
, offset
));
7009 return gen_rtx_PLUS (Pmode
, base
, offset
);
7016 /* Find a spare register to use during the prolog of a function. */
7019 thumb_find_work_register (unsigned long pushed_regs_mask
)
7023 /* Check the argument registers first as these are call-used. The
7024 register allocation order means that sometimes r3 might be used
7025 but earlier argument registers might not, so check them all. */
7026 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7027 if (!df_regs_ever_live_p (reg
))
7030 /* Before going on to check the call-saved registers we can try a couple
7031 more ways of deducing that r3 is available. The first is when we are
7032 pushing anonymous arguments onto the stack and we have less than 4
7033 registers worth of fixed arguments(*). In this case r3 will be part of
7034 the variable argument list and so we can be sure that it will be
7035 pushed right at the start of the function. Hence it will be available
7036 for the rest of the prologue.
7037 (*): ie crtl->args.pretend_args_size is greater than 0. */
7038 if (cfun
->machine
->uses_anonymous_args
7039 && crtl
->args
.pretend_args_size
> 0)
7040 return LAST_ARG_REGNUM
;
7042 /* The other case is when we have fixed arguments but less than 4 registers
7043 worth. In this case r3 might be used in the body of the function, but
7044 it is not being used to convey an argument into the function. In theory
7045 we could just check crtl->args.size to see how many bytes are
7046 being passed in argument registers, but it seems that it is unreliable.
7047 Sometimes it will have the value 0 when in fact arguments are being
7048 passed. (See testcase execute/20021111-1.c for an example). So we also
7049 check the args_info.nregs field as well. The problem with this field is
7050 that it makes no allowances for arguments that are passed to the
7051 function but which are not used. Hence we could miss an opportunity
7052 when a function has an unused argument in r3. But it is better to be
7053 safe than to be sorry. */
7054 if (! cfun
->machine
->uses_anonymous_args
7055 && crtl
->args
.size
>= 0
7056 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7057 && (TARGET_AAPCS_BASED
7058 ? crtl
->args
.info
.aapcs_ncrn
< 4
7059 : crtl
->args
.info
.nregs
< 4))
7060 return LAST_ARG_REGNUM
;
7062 /* Otherwise look for a call-saved register that is going to be pushed. */
7063 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7064 if (pushed_regs_mask
& (1 << reg
))
7069 /* Thumb-2 can use high regs. */
7070 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7071 if (pushed_regs_mask
& (1 << reg
))
7074 /* Something went wrong - thumb_compute_save_reg_mask()
7075 should have arranged for a suitable register to be pushed. */
7079 static GTY(()) int pic_labelno
;
7081 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7085 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7087 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7089 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7092 gcc_assert (flag_pic
);
7094 pic_reg
= cfun
->machine
->pic_reg
;
7095 if (TARGET_VXWORKS_RTP
)
7097 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7098 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7099 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7101 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7103 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7104 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7108 /* We use an UNSPEC rather than a LABEL_REF because this label
7109 never appears in the code stream. */
7111 labelno
= GEN_INT (pic_labelno
++);
7112 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7113 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7115 /* On the ARM the PC register contains 'dot + 8' at the time of the
7116 addition, on the Thumb it is 'dot + 4'. */
7117 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7118 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7120 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7124 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7126 else /* TARGET_THUMB1 */
7128 if (arm_pic_register
!= INVALID_REGNUM
7129 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7131 /* We will have pushed the pic register, so we should always be
7132 able to find a work register. */
7133 pic_tmp
= gen_rtx_REG (SImode
,
7134 thumb_find_work_register (saved_regs
));
7135 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7136 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7137 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7139 else if (arm_pic_register
!= INVALID_REGNUM
7140 && arm_pic_register
> LAST_LO_REGNUM
7141 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7143 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7144 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7145 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7148 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7152 /* Need to emit this whether or not we obey regdecls,
7153 since setjmp/longjmp can cause life info to screw up. */
7157 /* Generate code to load the address of a static var when flag_pic is set. */
7159 arm_pic_static_addr (rtx orig
, rtx reg
)
7161 rtx l1
, labelno
, offset_rtx
, insn
;
7163 gcc_assert (flag_pic
);
7165 /* We use an UNSPEC rather than a LABEL_REF because this label
7166 never appears in the code stream. */
7167 labelno
= GEN_INT (pic_labelno
++);
7168 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7169 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7171 /* On the ARM the PC register contains 'dot + 8' at the time of the
7172 addition, on the Thumb it is 'dot + 4'. */
7173 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7174 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7175 UNSPEC_SYMBOL_OFFSET
);
7176 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7178 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7182 /* Return nonzero if X is valid as an ARM state addressing register. */
7184 arm_address_register_rtx_p (rtx x
, int strict_p
)
7194 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7196 return (regno
<= LAST_ARM_REGNUM
7197 || regno
>= FIRST_PSEUDO_REGISTER
7198 || regno
== FRAME_POINTER_REGNUM
7199 || regno
== ARG_POINTER_REGNUM
);
7202 /* Return TRUE if this rtx is the difference of a symbol and a label,
7203 and will reduce to a PC-relative relocation in the object file.
7204 Expressions like this can be left alone when generating PIC, rather
7205 than forced through the GOT. */
7207 pcrel_constant_p (rtx x
)
7209 if (GET_CODE (x
) == MINUS
)
7210 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7215 /* Return true if X will surely end up in an index register after next
7218 will_be_in_index_register (const_rtx x
)
7220 /* arm.md: calculate_pic_address will split this into a register. */
7221 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7224 /* Return nonzero if X is a valid ARM state address operand. */
7226 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7230 enum rtx_code code
= GET_CODE (x
);
7232 if (arm_address_register_rtx_p (x
, strict_p
))
7235 use_ldrd
= (TARGET_LDRD
7236 && (mode
== DImode
|| mode
== DFmode
));
7238 if (code
== POST_INC
|| code
== PRE_DEC
7239 || ((code
== PRE_INC
|| code
== POST_DEC
)
7240 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7241 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7243 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7244 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7245 && GET_CODE (XEXP (x
, 1)) == PLUS
7246 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7248 rtx addend
= XEXP (XEXP (x
, 1), 1);
7250 /* Don't allow ldrd post increment by register because it's hard
7251 to fixup invalid register choices. */
7253 && GET_CODE (x
) == POST_MODIFY
7257 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7258 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7261 /* After reload constants split into minipools will have addresses
7262 from a LABEL_REF. */
7263 else if (reload_completed
7264 && (code
== LABEL_REF
7266 && GET_CODE (XEXP (x
, 0)) == PLUS
7267 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7268 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7271 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7274 else if (code
== PLUS
)
7276 rtx xop0
= XEXP (x
, 0);
7277 rtx xop1
= XEXP (x
, 1);
7279 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7280 && ((CONST_INT_P (xop1
)
7281 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7282 || (!strict_p
&& will_be_in_index_register (xop1
))))
7283 || (arm_address_register_rtx_p (xop1
, strict_p
)
7284 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7288 /* Reload currently can't handle MINUS, so disable this for now */
7289 else if (GET_CODE (x
) == MINUS
)
7291 rtx xop0
= XEXP (x
, 0);
7292 rtx xop1
= XEXP (x
, 1);
7294 return (arm_address_register_rtx_p (xop0
, strict_p
)
7295 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7299 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7300 && code
== SYMBOL_REF
7301 && CONSTANT_POOL_ADDRESS_P (x
)
7303 && symbol_mentioned_p (get_pool_constant (x
))
7304 && ! pcrel_constant_p (get_pool_constant (x
))))
7310 /* Return nonzero if X is a valid Thumb-2 address operand. */
7312 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7315 enum rtx_code code
= GET_CODE (x
);
7317 if (arm_address_register_rtx_p (x
, strict_p
))
7320 use_ldrd
= (TARGET_LDRD
7321 && (mode
== DImode
|| mode
== DFmode
));
7323 if (code
== POST_INC
|| code
== PRE_DEC
7324 || ((code
== PRE_INC
|| code
== POST_DEC
)
7325 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7326 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7328 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7329 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7330 && GET_CODE (XEXP (x
, 1)) == PLUS
7331 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7333 /* Thumb-2 only has autoincrement by constant. */
7334 rtx addend
= XEXP (XEXP (x
, 1), 1);
7335 HOST_WIDE_INT offset
;
7337 if (!CONST_INT_P (addend
))
7340 offset
= INTVAL(addend
);
7341 if (GET_MODE_SIZE (mode
) <= 4)
7342 return (offset
> -256 && offset
< 256);
7344 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7345 && (offset
& 3) == 0);
7348 /* After reload constants split into minipools will have addresses
7349 from a LABEL_REF. */
7350 else if (reload_completed
7351 && (code
== LABEL_REF
7353 && GET_CODE (XEXP (x
, 0)) == PLUS
7354 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7355 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7358 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7361 else if (code
== PLUS
)
7363 rtx xop0
= XEXP (x
, 0);
7364 rtx xop1
= XEXP (x
, 1);
7366 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7367 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7368 || (!strict_p
&& will_be_in_index_register (xop1
))))
7369 || (arm_address_register_rtx_p (xop1
, strict_p
)
7370 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7373 /* Normally we can assign constant values to target registers without
7374 the help of constant pool. But there are cases we have to use constant
7376 1) assign a label to register.
7377 2) sign-extend a 8bit value to 32bit and then assign to register.
7379 Constant pool access in format:
7380 (set (reg r0) (mem (symbol_ref (".LC0"))))
7381 will cause the use of literal pool (later in function arm_reorg).
7382 So here we mark such format as an invalid format, then the compiler
7383 will adjust it into:
7384 (set (reg r0) (symbol_ref (".LC0")))
7385 (set (reg r0) (mem (reg r0))).
7386 No extra register is required, and (mem (reg r0)) won't cause the use
7387 of literal pools. */
7388 else if (arm_disable_literal_pool
&& code
== SYMBOL_REF
7389 && CONSTANT_POOL_ADDRESS_P (x
))
7392 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7393 && code
== SYMBOL_REF
7394 && CONSTANT_POOL_ADDRESS_P (x
)
7396 && symbol_mentioned_p (get_pool_constant (x
))
7397 && ! pcrel_constant_p (get_pool_constant (x
))))
7403 /* Return nonzero if INDEX is valid for an address index operand in
7406 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7409 HOST_WIDE_INT range
;
7410 enum rtx_code code
= GET_CODE (index
);
7412 /* Standard coprocessor addressing modes. */
7413 if (TARGET_HARD_FLOAT
7414 && (mode
== SFmode
|| mode
== DFmode
))
7415 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7416 && INTVAL (index
) > -1024
7417 && (INTVAL (index
) & 3) == 0);
7419 /* For quad modes, we restrict the constant offset to be slightly less
7420 than what the instruction format permits. We do this because for
7421 quad mode moves, we will actually decompose them into two separate
7422 double-mode reads or writes. INDEX must therefore be a valid
7423 (double-mode) offset and so should INDEX+8. */
7424 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7425 return (code
== CONST_INT
7426 && INTVAL (index
) < 1016
7427 && INTVAL (index
) > -1024
7428 && (INTVAL (index
) & 3) == 0);
7430 /* We have no such constraint on double mode offsets, so we permit the
7431 full range of the instruction format. */
7432 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7433 return (code
== CONST_INT
7434 && INTVAL (index
) < 1024
7435 && INTVAL (index
) > -1024
7436 && (INTVAL (index
) & 3) == 0);
7438 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7439 return (code
== CONST_INT
7440 && INTVAL (index
) < 1024
7441 && INTVAL (index
) > -1024
7442 && (INTVAL (index
) & 3) == 0);
7444 if (arm_address_register_rtx_p (index
, strict_p
)
7445 && (GET_MODE_SIZE (mode
) <= 4))
7448 if (mode
== DImode
|| mode
== DFmode
)
7450 if (code
== CONST_INT
)
7452 HOST_WIDE_INT val
= INTVAL (index
);
7455 return val
> -256 && val
< 256;
7457 return val
> -4096 && val
< 4092;
7460 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7463 if (GET_MODE_SIZE (mode
) <= 4
7467 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7471 rtx xiop0
= XEXP (index
, 0);
7472 rtx xiop1
= XEXP (index
, 1);
7474 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7475 && power_of_two_operand (xiop1
, SImode
))
7476 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7477 && power_of_two_operand (xiop0
, SImode
)));
7479 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7480 || code
== ASHIFT
|| code
== ROTATERT
)
7482 rtx op
= XEXP (index
, 1);
7484 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7487 && INTVAL (op
) <= 31);
7491 /* For ARM v4 we may be doing a sign-extend operation during the
7497 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7503 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7505 return (code
== CONST_INT
7506 && INTVAL (index
) < range
7507 && INTVAL (index
) > -range
);
7510 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7511 index operand. i.e. 1, 2, 4 or 8. */
7513 thumb2_index_mul_operand (rtx op
)
7517 if (!CONST_INT_P (op
))
7521 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7524 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7526 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7528 enum rtx_code code
= GET_CODE (index
);
7530 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7531 /* Standard coprocessor addressing modes. */
7532 if (TARGET_HARD_FLOAT
7533 && (mode
== SFmode
|| mode
== DFmode
))
7534 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7535 /* Thumb-2 allows only > -256 index range for it's core register
7536 load/stores. Since we allow SF/DF in core registers, we have
7537 to use the intersection between -256~4096 (core) and -1024~1024
7539 && INTVAL (index
) > -256
7540 && (INTVAL (index
) & 3) == 0);
7542 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7544 /* For DImode assume values will usually live in core regs
7545 and only allow LDRD addressing modes. */
7546 if (!TARGET_LDRD
|| mode
!= DImode
)
7547 return (code
== CONST_INT
7548 && INTVAL (index
) < 1024
7549 && INTVAL (index
) > -1024
7550 && (INTVAL (index
) & 3) == 0);
7553 /* For quad modes, we restrict the constant offset to be slightly less
7554 than what the instruction format permits. We do this because for
7555 quad mode moves, we will actually decompose them into two separate
7556 double-mode reads or writes. INDEX must therefore be a valid
7557 (double-mode) offset and so should INDEX+8. */
7558 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7559 return (code
== CONST_INT
7560 && INTVAL (index
) < 1016
7561 && INTVAL (index
) > -1024
7562 && (INTVAL (index
) & 3) == 0);
7564 /* We have no such constraint on double mode offsets, so we permit the
7565 full range of the instruction format. */
7566 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7567 return (code
== CONST_INT
7568 && INTVAL (index
) < 1024
7569 && INTVAL (index
) > -1024
7570 && (INTVAL (index
) & 3) == 0);
7572 if (arm_address_register_rtx_p (index
, strict_p
)
7573 && (GET_MODE_SIZE (mode
) <= 4))
7576 if (mode
== DImode
|| mode
== DFmode
)
7578 if (code
== CONST_INT
)
7580 HOST_WIDE_INT val
= INTVAL (index
);
7581 /* ??? Can we assume ldrd for thumb2? */
7582 /* Thumb-2 ldrd only has reg+const addressing modes. */
7583 /* ldrd supports offsets of +-1020.
7584 However the ldr fallback does not. */
7585 return val
> -256 && val
< 256 && (val
& 3) == 0;
7593 rtx xiop0
= XEXP (index
, 0);
7594 rtx xiop1
= XEXP (index
, 1);
7596 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7597 && thumb2_index_mul_operand (xiop1
))
7598 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7599 && thumb2_index_mul_operand (xiop0
)));
7601 else if (code
== ASHIFT
)
7603 rtx op
= XEXP (index
, 1);
7605 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7608 && INTVAL (op
) <= 3);
7611 return (code
== CONST_INT
7612 && INTVAL (index
) < 4096
7613 && INTVAL (index
) > -256);
7616 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7618 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
7628 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
7630 return (regno
<= LAST_LO_REGNUM
7631 || regno
> LAST_VIRTUAL_REGISTER
7632 || regno
== FRAME_POINTER_REGNUM
7633 || (GET_MODE_SIZE (mode
) >= 4
7634 && (regno
== STACK_POINTER_REGNUM
7635 || regno
>= FIRST_PSEUDO_REGISTER
7636 || x
== hard_frame_pointer_rtx
7637 || x
== arg_pointer_rtx
)));
7640 /* Return nonzero if x is a legitimate index register. This is the case
7641 for any base register that can access a QImode object. */
7643 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
7645 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
7648 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7650 The AP may be eliminated to either the SP or the FP, so we use the
7651 least common denominator, e.g. SImode, and offsets from 0 to 64.
7653 ??? Verify whether the above is the right approach.
7655 ??? Also, the FP may be eliminated to the SP, so perhaps that
7656 needs special handling also.
7658 ??? Look at how the mips16 port solves this problem. It probably uses
7659 better ways to solve some of these problems.
7661 Although it is not incorrect, we don't accept QImode and HImode
7662 addresses based on the frame pointer or arg pointer until the
7663 reload pass starts. This is so that eliminating such addresses
7664 into stack based ones won't produce impossible code. */
7666 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7668 /* ??? Not clear if this is right. Experiment. */
7669 if (GET_MODE_SIZE (mode
) < 4
7670 && !(reload_in_progress
|| reload_completed
)
7671 && (reg_mentioned_p (frame_pointer_rtx
, x
)
7672 || reg_mentioned_p (arg_pointer_rtx
, x
)
7673 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
7674 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
7675 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
7676 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
7679 /* Accept any base register. SP only in SImode or larger. */
7680 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
7683 /* This is PC relative data before arm_reorg runs. */
7684 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
7685 && GET_CODE (x
) == SYMBOL_REF
7686 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
7689 /* This is PC relative data after arm_reorg runs. */
7690 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
7692 && (GET_CODE (x
) == LABEL_REF
7693 || (GET_CODE (x
) == CONST
7694 && GET_CODE (XEXP (x
, 0)) == PLUS
7695 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7696 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7699 /* Post-inc indexing only supported for SImode and larger. */
7700 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
7701 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
7704 else if (GET_CODE (x
) == PLUS
)
7706 /* REG+REG address can be any two index registers. */
7707 /* We disallow FRAME+REG addressing since we know that FRAME
7708 will be replaced with STACK, and SP relative addressing only
7709 permits SP+OFFSET. */
7710 if (GET_MODE_SIZE (mode
) <= 4
7711 && XEXP (x
, 0) != frame_pointer_rtx
7712 && XEXP (x
, 1) != frame_pointer_rtx
7713 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7714 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
7715 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
7718 /* REG+const has 5-7 bit offset for non-SP registers. */
7719 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
7720 || XEXP (x
, 0) == arg_pointer_rtx
)
7721 && CONST_INT_P (XEXP (x
, 1))
7722 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
7725 /* REG+const has 10-bit offset for SP, but only SImode and
7726 larger is supported. */
7727 /* ??? Should probably check for DI/DFmode overflow here
7728 just like GO_IF_LEGITIMATE_OFFSET does. */
7729 else if (REG_P (XEXP (x
, 0))
7730 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
7731 && GET_MODE_SIZE (mode
) >= 4
7732 && CONST_INT_P (XEXP (x
, 1))
7733 && INTVAL (XEXP (x
, 1)) >= 0
7734 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
7735 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7738 else if (REG_P (XEXP (x
, 0))
7739 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
7740 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
7741 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
7742 && REGNO (XEXP (x
, 0))
7743 <= LAST_VIRTUAL_POINTER_REGISTER
))
7744 && GET_MODE_SIZE (mode
) >= 4
7745 && CONST_INT_P (XEXP (x
, 1))
7746 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
7750 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7751 && GET_MODE_SIZE (mode
) == 4
7752 && GET_CODE (x
) == SYMBOL_REF
7753 && CONSTANT_POOL_ADDRESS_P (x
)
7755 && symbol_mentioned_p (get_pool_constant (x
))
7756 && ! pcrel_constant_p (get_pool_constant (x
))))
7762 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7763 instruction of mode MODE. */
7765 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
7767 switch (GET_MODE_SIZE (mode
))
7770 return val
>= 0 && val
< 32;
7773 return val
>= 0 && val
< 64 && (val
& 1) == 0;
7777 && (val
+ GET_MODE_SIZE (mode
)) <= 128
7783 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
7786 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
7787 else if (TARGET_THUMB2
)
7788 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
7789 else /* if (TARGET_THUMB1) */
7790 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
7793 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7795 Given an rtx X being reloaded into a reg required to be
7796 in class CLASS, return the class of reg to actually use.
7797 In general this is just CLASS, but for the Thumb core registers and
7798 immediate constants we prefer a LO_REGS class or a subset. */
7801 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
7807 if (rclass
== GENERAL_REGS
)
7814 /* Build the SYMBOL_REF for __tls_get_addr. */
7816 static GTY(()) rtx tls_get_addr_libfunc
;
7819 get_tls_get_addr (void)
7821 if (!tls_get_addr_libfunc
)
7822 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
7823 return tls_get_addr_libfunc
;
7827 arm_load_tp (rtx target
)
7830 target
= gen_reg_rtx (SImode
);
7834 /* Can return in any reg. */
7835 emit_insn (gen_load_tp_hard (target
));
7839 /* Always returned in r0. Immediately copy the result into a pseudo,
7840 otherwise other uses of r0 (e.g. setting up function arguments) may
7841 clobber the value. */
7845 emit_insn (gen_load_tp_soft ());
7847 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
7848 emit_move_insn (target
, tmp
);
7854 load_tls_operand (rtx x
, rtx reg
)
7858 if (reg
== NULL_RTX
)
7859 reg
= gen_reg_rtx (SImode
);
7861 tmp
= gen_rtx_CONST (SImode
, x
);
7863 emit_move_insn (reg
, tmp
);
7869 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
7871 rtx label
, labelno
, sum
;
7873 gcc_assert (reloc
!= TLS_DESCSEQ
);
7876 labelno
= GEN_INT (pic_labelno
++);
7877 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7878 label
= gen_rtx_CONST (VOIDmode
, label
);
7880 sum
= gen_rtx_UNSPEC (Pmode
,
7881 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
7882 GEN_INT (TARGET_ARM
? 8 : 4)),
7884 reg
= load_tls_operand (sum
, reg
);
7887 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
7889 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7891 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
7892 LCT_PURE
, /* LCT_CONST? */
7893 Pmode
, 1, reg
, Pmode
);
7895 rtx_insn
*insns
= get_insns ();
7902 arm_tls_descseq_addr (rtx x
, rtx reg
)
7904 rtx labelno
= GEN_INT (pic_labelno
++);
7905 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7906 rtx sum
= gen_rtx_UNSPEC (Pmode
,
7907 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
7908 gen_rtx_CONST (VOIDmode
, label
),
7909 GEN_INT (!TARGET_ARM
)),
7911 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
7913 emit_insn (gen_tlscall (x
, labelno
));
7915 reg
= gen_reg_rtx (SImode
);
7917 gcc_assert (REGNO (reg
) != R0_REGNUM
);
7919 emit_move_insn (reg
, reg0
);
7925 legitimize_tls_address (rtx x
, rtx reg
)
7927 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
7929 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
7933 case TLS_MODEL_GLOBAL_DYNAMIC
:
7934 if (TARGET_GNU2_TLS
)
7936 reg
= arm_tls_descseq_addr (x
, reg
);
7938 tp
= arm_load_tp (NULL_RTX
);
7940 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7944 /* Original scheme */
7945 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
7946 dest
= gen_reg_rtx (Pmode
);
7947 emit_libcall_block (insns
, dest
, ret
, x
);
7951 case TLS_MODEL_LOCAL_DYNAMIC
:
7952 if (TARGET_GNU2_TLS
)
7954 reg
= arm_tls_descseq_addr (x
, reg
);
7956 tp
= arm_load_tp (NULL_RTX
);
7958 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
7962 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
7964 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7965 share the LDM result with other LD model accesses. */
7966 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
7968 dest
= gen_reg_rtx (Pmode
);
7969 emit_libcall_block (insns
, dest
, ret
, eqv
);
7971 /* Load the addend. */
7972 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
7973 GEN_INT (TLS_LDO32
)),
7975 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
7976 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
7980 case TLS_MODEL_INITIAL_EXEC
:
7981 labelno
= GEN_INT (pic_labelno
++);
7982 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7983 label
= gen_rtx_CONST (VOIDmode
, label
);
7984 sum
= gen_rtx_UNSPEC (Pmode
,
7985 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
7986 GEN_INT (TARGET_ARM
? 8 : 4)),
7988 reg
= load_tls_operand (sum
, reg
);
7991 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
7992 else if (TARGET_THUMB2
)
7993 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
7996 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
7997 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8000 tp
= arm_load_tp (NULL_RTX
);
8002 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8004 case TLS_MODEL_LOCAL_EXEC
:
8005 tp
= arm_load_tp (NULL_RTX
);
8007 reg
= gen_rtx_UNSPEC (Pmode
,
8008 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8010 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8012 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8019 /* Try machine-dependent ways of modifying an illegitimate address
8020 to be legitimate. If we find one, return the new, valid address. */
8022 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8024 if (arm_tls_referenced_p (x
))
8028 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8030 addend
= XEXP (XEXP (x
, 0), 1);
8031 x
= XEXP (XEXP (x
, 0), 0);
8034 if (GET_CODE (x
) != SYMBOL_REF
)
8037 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8039 x
= legitimize_tls_address (x
, NULL_RTX
);
8043 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8052 /* TODO: legitimize_address for Thumb2. */
8055 return thumb_legitimize_address (x
, orig_x
, mode
);
8058 if (GET_CODE (x
) == PLUS
)
8060 rtx xop0
= XEXP (x
, 0);
8061 rtx xop1
= XEXP (x
, 1);
8063 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8064 xop0
= force_reg (SImode
, xop0
);
8066 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8067 && !symbol_mentioned_p (xop1
))
8068 xop1
= force_reg (SImode
, xop1
);
8070 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8071 && CONST_INT_P (xop1
))
8073 HOST_WIDE_INT n
, low_n
;
8077 /* VFP addressing modes actually allow greater offsets, but for
8078 now we just stick with the lowest common denominator. */
8079 if (mode
== DImode
|| mode
== DFmode
)
8091 low_n
= ((mode
) == TImode
? 0
8092 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8096 base_reg
= gen_reg_rtx (SImode
);
8097 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8098 emit_move_insn (base_reg
, val
);
8099 x
= plus_constant (Pmode
, base_reg
, low_n
);
8101 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8102 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8105 /* XXX We don't allow MINUS any more -- see comment in
8106 arm_legitimate_address_outer_p (). */
8107 else if (GET_CODE (x
) == MINUS
)
8109 rtx xop0
= XEXP (x
, 0);
8110 rtx xop1
= XEXP (x
, 1);
8112 if (CONSTANT_P (xop0
))
8113 xop0
= force_reg (SImode
, xop0
);
8115 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8116 xop1
= force_reg (SImode
, xop1
);
8118 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8119 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8122 /* Make sure to take full advantage of the pre-indexed addressing mode
8123 with absolute addresses which often allows for the base register to
8124 be factorized for multiple adjacent memory references, and it might
8125 even allows for the mini pool to be avoided entirely. */
8126 else if (CONST_INT_P (x
) && optimize
> 0)
8129 HOST_WIDE_INT mask
, base
, index
;
8132 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8133 use a 8-bit index. So let's use a 12-bit index for SImode only and
8134 hope that arm_gen_constant will enable ldrb to use more bits. */
8135 bits
= (mode
== SImode
) ? 12 : 8;
8136 mask
= (1 << bits
) - 1;
8137 base
= INTVAL (x
) & ~mask
;
8138 index
= INTVAL (x
) & mask
;
8139 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8141 /* It'll most probably be more efficient to generate the base
8142 with more bits set and use a negative index instead. */
8146 base_reg
= force_reg (SImode
, GEN_INT (base
));
8147 x
= plus_constant (Pmode
, base_reg
, index
);
8152 /* We need to find and carefully transform any SYMBOL and LABEL
8153 references; so go back to the original address expression. */
8154 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8156 if (new_x
!= orig_x
)
8164 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8165 to be legitimate. If we find one, return the new, valid address. */
8167 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8169 if (GET_CODE (x
) == PLUS
8170 && CONST_INT_P (XEXP (x
, 1))
8171 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8172 || INTVAL (XEXP (x
, 1)) < 0))
8174 rtx xop0
= XEXP (x
, 0);
8175 rtx xop1
= XEXP (x
, 1);
8176 HOST_WIDE_INT offset
= INTVAL (xop1
);
8178 /* Try and fold the offset into a biasing of the base register and
8179 then offsetting that. Don't do this when optimizing for space
8180 since it can cause too many CSEs. */
8181 if (optimize_size
&& offset
>= 0
8182 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8184 HOST_WIDE_INT delta
;
8187 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8188 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8189 delta
= 31 * GET_MODE_SIZE (mode
);
8191 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8193 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8195 x
= plus_constant (Pmode
, xop0
, delta
);
8197 else if (offset
< 0 && offset
> -256)
8198 /* Small negative offsets are best done with a subtract before the
8199 dereference, forcing these into a register normally takes two
8201 x
= force_operand (x
, NULL_RTX
);
8204 /* For the remaining cases, force the constant into a register. */
8205 xop1
= force_reg (SImode
, xop1
);
8206 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8209 else if (GET_CODE (x
) == PLUS
8210 && s_register_operand (XEXP (x
, 1), SImode
)
8211 && !s_register_operand (XEXP (x
, 0), SImode
))
8213 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8215 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8220 /* We need to find and carefully transform any SYMBOL and LABEL
8221 references; so go back to the original address expression. */
8222 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8224 if (new_x
!= orig_x
)
8231 /* Return TRUE if X contains any TLS symbol references. */
8234 arm_tls_referenced_p (rtx x
)
8236 if (! TARGET_HAVE_TLS
)
8239 subrtx_iterator::array_type array
;
8240 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8242 const_rtx x
= *iter
;
8243 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8246 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8247 TLS offsets, not real symbol references. */
8248 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8249 iter
.skip_subrtxes ();
8254 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8256 On the ARM, allow any integer (invalid ones are removed later by insn
8257 patterns), nice doubles and symbol_refs which refer to the function's
8260 When generating pic allow anything. */
8263 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8265 return flag_pic
|| !label_mentioned_p (x
);
8269 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8271 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8272 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8273 for ARMv8-M Baseline or later the result is valid. */
8274 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8277 return (CONST_INT_P (x
)
8278 || CONST_DOUBLE_P (x
)
8279 || CONSTANT_ADDRESS_P (x
)
8284 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8286 return (!arm_cannot_force_const_mem (mode
, x
)
8288 ? arm_legitimate_constant_p_1 (mode
, x
)
8289 : thumb_legitimate_constant_p (mode
, x
)));
8292 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8295 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8299 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8301 split_const (x
, &base
, &offset
);
8302 if (GET_CODE (base
) == SYMBOL_REF
8303 && !offset_within_block_p (base
, INTVAL (offset
)))
8306 return arm_tls_referenced_p (x
);
8309 #define REG_OR_SUBREG_REG(X) \
8311 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8313 #define REG_OR_SUBREG_RTX(X) \
8314 (REG_P (X) ? (X) : SUBREG_REG (X))
8317 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8319 machine_mode mode
= GET_MODE (x
);
8328 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8335 return COSTS_N_INSNS (1);
8338 if (CONST_INT_P (XEXP (x
, 1)))
8341 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8348 return COSTS_N_INSNS (2) + cycles
;
8350 return COSTS_N_INSNS (1) + 16;
8353 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8355 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8356 return (COSTS_N_INSNS (words
)
8357 + 4 * ((MEM_P (SET_SRC (x
)))
8358 + MEM_P (SET_DEST (x
))));
8363 if (UINTVAL (x
) < 256
8364 /* 16-bit constant. */
8365 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8367 if (thumb_shiftable_const (INTVAL (x
)))
8368 return COSTS_N_INSNS (2);
8369 return COSTS_N_INSNS (3);
8371 else if ((outer
== PLUS
|| outer
== COMPARE
)
8372 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8374 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8375 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8376 return COSTS_N_INSNS (1);
8377 else if (outer
== AND
)
8380 /* This duplicates the tests in the andsi3 expander. */
8381 for (i
= 9; i
<= 31; i
++)
8382 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8383 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8384 return COSTS_N_INSNS (2);
8386 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8387 || outer
== LSHIFTRT
)
8389 return COSTS_N_INSNS (2);
8395 return COSTS_N_INSNS (3);
8413 /* XXX another guess. */
8414 /* Memory costs quite a lot for the first word, but subsequent words
8415 load at the equivalent of a single insn each. */
8416 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8417 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8422 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8428 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8429 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8435 return total
+ COSTS_N_INSNS (1);
8437 /* Assume a two-shift sequence. Increase the cost slightly so
8438 we prefer actual shifts over an extend operation. */
8439 return total
+ 1 + COSTS_N_INSNS (2);
8446 /* Estimates the size cost of thumb1 instructions.
8447 For now most of the code is copied from thumb1_rtx_costs. We need more
8448 fine grain tuning when we have more related test cases. */
8450 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8452 machine_mode mode
= GET_MODE (x
);
8461 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8465 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8466 defined by RTL expansion, especially for the expansion of
8468 if ((GET_CODE (XEXP (x
, 0)) == MULT
8469 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8470 || (GET_CODE (XEXP (x
, 1)) == MULT
8471 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8472 return COSTS_N_INSNS (2);
8477 return COSTS_N_INSNS (1);
8480 if (CONST_INT_P (XEXP (x
, 1)))
8482 /* Thumb1 mul instruction can't operate on const. We must Load it
8483 into a register first. */
8484 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8485 /* For the targets which have a very small and high-latency multiply
8486 unit, we prefer to synthesize the mult with up to 5 instructions,
8487 giving a good balance between size and performance. */
8488 if (arm_arch6m
&& arm_m_profile_small_mul
)
8489 return COSTS_N_INSNS (5);
8491 return COSTS_N_INSNS (1) + const_size
;
8493 return COSTS_N_INSNS (1);
8496 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8498 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8499 cost
= COSTS_N_INSNS (words
);
8500 if (satisfies_constraint_J (SET_SRC (x
))
8501 || satisfies_constraint_K (SET_SRC (x
))
8502 /* Too big an immediate for a 2-byte mov, using MOVT. */
8503 || (CONST_INT_P (SET_SRC (x
))
8504 && UINTVAL (SET_SRC (x
)) >= 256
8506 && satisfies_constraint_j (SET_SRC (x
)))
8507 /* thumb1_movdi_insn. */
8508 || ((words
> 1) && MEM_P (SET_SRC (x
))))
8509 cost
+= COSTS_N_INSNS (1);
8515 if (UINTVAL (x
) < 256)
8516 return COSTS_N_INSNS (1);
8517 /* movw is 4byte long. */
8518 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
8519 return COSTS_N_INSNS (2);
8520 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8521 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8522 return COSTS_N_INSNS (2);
8523 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8524 if (thumb_shiftable_const (INTVAL (x
)))
8525 return COSTS_N_INSNS (2);
8526 return COSTS_N_INSNS (3);
8528 else if ((outer
== PLUS
|| outer
== COMPARE
)
8529 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8531 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8532 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8533 return COSTS_N_INSNS (1);
8534 else if (outer
== AND
)
8537 /* This duplicates the tests in the andsi3 expander. */
8538 for (i
= 9; i
<= 31; i
++)
8539 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8540 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8541 return COSTS_N_INSNS (2);
8543 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8544 || outer
== LSHIFTRT
)
8546 return COSTS_N_INSNS (2);
8552 return COSTS_N_INSNS (3);
8566 return COSTS_N_INSNS (1);
8569 return (COSTS_N_INSNS (1)
8571 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8572 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8573 ? COSTS_N_INSNS (1) : 0));
8577 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8582 /* XXX still guessing. */
8583 switch (GET_MODE (XEXP (x
, 0)))
8586 return (1 + (mode
== DImode
? 4 : 0)
8587 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8590 return (4 + (mode
== DImode
? 4 : 0)
8591 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8594 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
8605 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8606 operand, then return the operand that is being shifted. If the shift
8607 is not by a constant, then set SHIFT_REG to point to the operand.
8608 Return NULL if OP is not a shifter operand. */
8610 shifter_op_p (rtx op
, rtx
*shift_reg
)
8612 enum rtx_code code
= GET_CODE (op
);
8614 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
8615 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
8616 return XEXP (op
, 0);
8617 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
8618 return XEXP (op
, 0);
8619 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
8620 || code
== ASHIFTRT
)
8622 if (!CONST_INT_P (XEXP (op
, 1)))
8623 *shift_reg
= XEXP (op
, 1);
8624 return XEXP (op
, 0);
8631 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
8633 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
8634 rtx_code code
= GET_CODE (x
);
8635 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
8637 switch (XINT (x
, 1))
8639 case UNSPEC_UNALIGNED_LOAD
:
8640 /* We can only do unaligned loads into the integer unit, and we can't
8642 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
8644 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
8645 + extra_cost
->ldst
.load_unaligned
);
8648 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
8649 ADDR_SPACE_GENERIC
, speed_p
);
8653 case UNSPEC_UNALIGNED_STORE
:
8654 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
8656 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
8657 + extra_cost
->ldst
.store_unaligned
);
8659 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
8661 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
8662 ADDR_SPACE_GENERIC
, speed_p
);
8673 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
8677 *cost
= COSTS_N_INSNS (2);
8683 /* Cost of a libcall. We assume one insn per argument, an amount for the
8684 call (one insn for -Os) and then one for processing the result. */
8685 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
8687 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
8690 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
8691 if (shift_op != NULL \
8692 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
8697 *cost += extra_cost->alu.arith_shift_reg; \
8698 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
8699 ASHIFT, 1, speed_p); \
8702 *cost += extra_cost->alu.arith_shift; \
8704 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
8705 ASHIFT, 0, speed_p) \
8706 + rtx_cost (XEXP (x, 1 - IDX), \
8707 GET_MODE (shift_op), \
8714 /* RTX costs. Make an estimate of the cost of executing the operation
8715 X, which is contained with an operation with code OUTER_CODE.
8716 SPEED_P indicates whether the cost desired is the performance cost,
8717 or the size cost. The estimate is stored in COST and the return
8718 value is TRUE if the cost calculation is final, or FALSE if the
8719 caller should recurse through the operands of X to add additional
8722 We currently make no attempt to model the size savings of Thumb-2
8723 16-bit instructions. At the normal points in compilation where
8724 this code is called we have no measure of whether the condition
8725 flags are live or not, and thus no realistic way to determine what
8726 the size will eventually be. */
8728 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8729 const struct cpu_cost_table
*extra_cost
,
8730 int *cost
, bool speed_p
)
8732 machine_mode mode
= GET_MODE (x
);
8734 *cost
= COSTS_N_INSNS (1);
8739 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
8741 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
8749 /* SET RTXs don't have a mode so we get it from the destination. */
8750 mode
= GET_MODE (SET_DEST (x
));
8752 if (REG_P (SET_SRC (x
))
8753 && REG_P (SET_DEST (x
)))
8755 /* Assume that most copies can be done with a single insn,
8756 unless we don't have HW FP, in which case everything
8757 larger than word mode will require two insns. */
8758 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
8759 && GET_MODE_SIZE (mode
) > 4)
8762 /* Conditional register moves can be encoded
8763 in 16 bits in Thumb mode. */
8764 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
8770 if (CONST_INT_P (SET_SRC (x
)))
8772 /* Handle CONST_INT here, since the value doesn't have a mode
8773 and we would otherwise be unable to work out the true cost. */
8774 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
8777 /* Slightly lower the cost of setting a core reg to a constant.
8778 This helps break up chains and allows for better scheduling. */
8779 if (REG_P (SET_DEST (x
))
8780 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
8783 /* Immediate moves with an immediate in the range [0, 255] can be
8784 encoded in 16 bits in Thumb mode. */
8785 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
8786 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
8788 goto const_int_cost
;
8794 /* A memory access costs 1 insn if the mode is small, or the address is
8795 a single register, otherwise it costs one insn per word. */
8796 if (REG_P (XEXP (x
, 0)))
8797 *cost
= COSTS_N_INSNS (1);
8799 && GET_CODE (XEXP (x
, 0)) == PLUS
8800 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
8801 /* This will be split into two instructions.
8802 See arm.md:calculate_pic_address. */
8803 *cost
= COSTS_N_INSNS (2);
8805 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
8807 /* For speed optimizations, add the costs of the address and
8808 accessing memory. */
8811 *cost
+= (extra_cost
->ldst
.load
8812 + arm_address_cost (XEXP (x
, 0), mode
,
8813 ADDR_SPACE_GENERIC
, speed_p
));
8815 *cost
+= extra_cost
->ldst
.load
;
8821 /* Calculations of LDM costs are complex. We assume an initial cost
8822 (ldm_1st) which will load the number of registers mentioned in
8823 ldm_regs_per_insn_1st registers; then each additional
8824 ldm_regs_per_insn_subsequent registers cost one more insn. The
8825 formula for N regs is thus:
8827 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
8828 + ldm_regs_per_insn_subsequent - 1)
8829 / ldm_regs_per_insn_subsequent).
8831 Additional costs may also be added for addressing. A similar
8832 formula is used for STM. */
8834 bool is_ldm
= load_multiple_operation (x
, SImode
);
8835 bool is_stm
= store_multiple_operation (x
, SImode
);
8837 if (is_ldm
|| is_stm
)
8841 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
8842 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
8843 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
8844 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
8845 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
8846 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
8847 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
8849 *cost
+= regs_per_insn_1st
8850 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
8851 + regs_per_insn_sub
- 1)
8852 / regs_per_insn_sub
);
8861 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
8862 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
8863 *cost
+= COSTS_N_INSNS (speed_p
8864 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
8865 else if (mode
== SImode
&& TARGET_IDIV
)
8866 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
8868 *cost
= LIBCALL_COST (2);
8869 return false; /* All arguments must be in registers. */
8872 /* MOD by a power of 2 can be expanded as:
8874 and r0, r0, #(n - 1)
8875 and r1, r1, #(n - 1)
8876 rsbpl r0, r1, #0. */
8877 if (CONST_INT_P (XEXP (x
, 1))
8878 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
8881 *cost
+= COSTS_N_INSNS (3);
8884 *cost
+= 2 * extra_cost
->alu
.logical
8885 + extra_cost
->alu
.arith
;
8891 *cost
= LIBCALL_COST (2);
8892 return false; /* All arguments must be in registers. */
8895 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
8897 *cost
+= (COSTS_N_INSNS (1)
8898 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
8900 *cost
+= extra_cost
->alu
.shift_reg
;
8908 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
8910 *cost
+= (COSTS_N_INSNS (2)
8911 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
8913 *cost
+= 2 * extra_cost
->alu
.shift
;
8916 else if (mode
== SImode
)
8918 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
8919 /* Slightly disparage register shifts at -Os, but not by much. */
8920 if (!CONST_INT_P (XEXP (x
, 1)))
8921 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
8922 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
8925 else if (GET_MODE_CLASS (mode
) == MODE_INT
8926 && GET_MODE_SIZE (mode
) < 4)
8930 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
8931 /* Slightly disparage register shifts at -Os, but not by
8933 if (!CONST_INT_P (XEXP (x
, 1)))
8934 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
8935 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
8937 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
8939 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
8941 /* Can use SBFX/UBFX. */
8943 *cost
+= extra_cost
->alu
.bfx
;
8944 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
8948 *cost
+= COSTS_N_INSNS (1);
8949 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
8952 if (CONST_INT_P (XEXP (x
, 1)))
8953 *cost
+= 2 * extra_cost
->alu
.shift
;
8955 *cost
+= (extra_cost
->alu
.shift
8956 + extra_cost
->alu
.shift_reg
);
8959 /* Slightly disparage register shifts. */
8960 *cost
+= !CONST_INT_P (XEXP (x
, 1));
8965 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
8966 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
8969 if (CONST_INT_P (XEXP (x
, 1)))
8970 *cost
+= (2 * extra_cost
->alu
.shift
8971 + extra_cost
->alu
.log_shift
);
8973 *cost
+= (extra_cost
->alu
.shift
8974 + extra_cost
->alu
.shift_reg
8975 + extra_cost
->alu
.log_shift_reg
);
8981 *cost
= LIBCALL_COST (2);
8990 *cost
+= extra_cost
->alu
.rev
;
8997 /* No rev instruction available. Look at arm_legacy_rev
8998 and thumb_legacy_rev for the form of RTL used then. */
9001 *cost
+= COSTS_N_INSNS (9);
9005 *cost
+= 6 * extra_cost
->alu
.shift
;
9006 *cost
+= 3 * extra_cost
->alu
.logical
;
9011 *cost
+= COSTS_N_INSNS (4);
9015 *cost
+= 2 * extra_cost
->alu
.shift
;
9016 *cost
+= extra_cost
->alu
.arith_shift
;
9017 *cost
+= 2 * extra_cost
->alu
.logical
;
9025 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9026 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9028 if (GET_CODE (XEXP (x
, 0)) == MULT
9029 || GET_CODE (XEXP (x
, 1)) == MULT
)
9031 rtx mul_op0
, mul_op1
, sub_op
;
9034 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9036 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9038 mul_op0
= XEXP (XEXP (x
, 0), 0);
9039 mul_op1
= XEXP (XEXP (x
, 0), 1);
9040 sub_op
= XEXP (x
, 1);
9044 mul_op0
= XEXP (XEXP (x
, 1), 0);
9045 mul_op1
= XEXP (XEXP (x
, 1), 1);
9046 sub_op
= XEXP (x
, 0);
9049 /* The first operand of the multiply may be optionally
9051 if (GET_CODE (mul_op0
) == NEG
)
9052 mul_op0
= XEXP (mul_op0
, 0);
9054 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9055 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9056 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9062 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9068 rtx shift_by_reg
= NULL
;
9072 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9073 if (shift_op
== NULL
)
9075 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9076 non_shift_op
= XEXP (x
, 0);
9079 non_shift_op
= XEXP (x
, 1);
9081 if (shift_op
!= NULL
)
9083 if (shift_by_reg
!= NULL
)
9086 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9087 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9090 *cost
+= extra_cost
->alu
.arith_shift
;
9092 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9093 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9098 && GET_CODE (XEXP (x
, 1)) == MULT
)
9102 *cost
+= extra_cost
->mult
[0].add
;
9103 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9104 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9105 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9109 if (CONST_INT_P (XEXP (x
, 0)))
9111 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9112 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9114 *cost
= COSTS_N_INSNS (insns
);
9116 *cost
+= insns
* extra_cost
->alu
.arith
;
9117 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9121 *cost
+= extra_cost
->alu
.arith
;
9126 if (GET_MODE_CLASS (mode
) == MODE_INT
9127 && GET_MODE_SIZE (mode
) < 4)
9129 rtx shift_op
, shift_reg
;
9132 /* We check both sides of the MINUS for shifter operands since,
9133 unlike PLUS, it's not commutative. */
9135 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9136 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9138 /* Slightly disparage, as we might need to widen the result. */
9141 *cost
+= extra_cost
->alu
.arith
;
9143 if (CONST_INT_P (XEXP (x
, 0)))
9145 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9154 *cost
+= COSTS_N_INSNS (1);
9156 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9158 rtx op1
= XEXP (x
, 1);
9161 *cost
+= 2 * extra_cost
->alu
.arith
;
9163 if (GET_CODE (op1
) == ZERO_EXTEND
)
9164 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9167 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9168 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9172 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9175 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9176 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9178 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9181 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9182 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9185 *cost
+= (extra_cost
->alu
.arith
9186 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9187 ? extra_cost
->alu
.arith
9188 : extra_cost
->alu
.arith_shift
));
9189 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9190 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9191 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9196 *cost
+= 2 * extra_cost
->alu
.arith
;
9202 *cost
= LIBCALL_COST (2);
9206 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9207 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9209 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9211 rtx mul_op0
, mul_op1
, add_op
;
9214 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9216 mul_op0
= XEXP (XEXP (x
, 0), 0);
9217 mul_op1
= XEXP (XEXP (x
, 0), 1);
9218 add_op
= XEXP (x
, 1);
9220 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9221 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9222 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9228 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9231 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9233 *cost
= LIBCALL_COST (2);
9237 /* Narrow modes can be synthesized in SImode, but the range
9238 of useful sub-operations is limited. Check for shift operations
9239 on one of the operands. Only left shifts can be used in the
9241 if (GET_MODE_CLASS (mode
) == MODE_INT
9242 && GET_MODE_SIZE (mode
) < 4)
9244 rtx shift_op
, shift_reg
;
9247 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9249 if (CONST_INT_P (XEXP (x
, 1)))
9251 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9252 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9254 *cost
= COSTS_N_INSNS (insns
);
9256 *cost
+= insns
* extra_cost
->alu
.arith
;
9257 /* Slightly penalize a narrow operation as the result may
9259 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9263 /* Slightly penalize a narrow operation as the result may
9267 *cost
+= extra_cost
->alu
.arith
;
9274 rtx shift_op
, shift_reg
;
9277 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9278 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9280 /* UXTA[BH] or SXTA[BH]. */
9282 *cost
+= extra_cost
->alu
.extend_arith
;
9283 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9285 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9290 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9291 if (shift_op
!= NULL
)
9296 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9297 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9300 *cost
+= extra_cost
->alu
.arith_shift
;
9302 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9303 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9306 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9308 rtx mul_op
= XEXP (x
, 0);
9310 if (TARGET_DSP_MULTIPLY
9311 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9312 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9313 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9314 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9315 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9316 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9317 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9318 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9319 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9320 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9321 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9322 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9327 *cost
+= extra_cost
->mult
[0].extend_add
;
9328 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9329 SIGN_EXTEND
, 0, speed_p
)
9330 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9331 SIGN_EXTEND
, 0, speed_p
)
9332 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9337 *cost
+= extra_cost
->mult
[0].add
;
9338 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9339 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9340 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9343 if (CONST_INT_P (XEXP (x
, 1)))
9345 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9346 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9348 *cost
= COSTS_N_INSNS (insns
);
9350 *cost
+= insns
* extra_cost
->alu
.arith
;
9351 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9355 *cost
+= extra_cost
->alu
.arith
;
9363 && GET_CODE (XEXP (x
, 0)) == MULT
9364 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9365 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9366 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9367 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9370 *cost
+= extra_cost
->mult
[1].extend_add
;
9371 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9372 ZERO_EXTEND
, 0, speed_p
)
9373 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9374 ZERO_EXTEND
, 0, speed_p
)
9375 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9379 *cost
+= COSTS_N_INSNS (1);
9381 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9382 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9385 *cost
+= (extra_cost
->alu
.arith
9386 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9387 ? extra_cost
->alu
.arith
9388 : extra_cost
->alu
.arith_shift
));
9390 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9392 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9397 *cost
+= 2 * extra_cost
->alu
.arith
;
9402 *cost
= LIBCALL_COST (2);
9405 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9408 *cost
+= extra_cost
->alu
.rev
;
9416 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9417 rtx op0
= XEXP (x
, 0);
9418 rtx shift_op
, shift_reg
;
9422 || (code
== IOR
&& TARGET_THUMB2
)))
9423 op0
= XEXP (op0
, 0);
9426 shift_op
= shifter_op_p (op0
, &shift_reg
);
9427 if (shift_op
!= NULL
)
9432 *cost
+= extra_cost
->alu
.log_shift_reg
;
9433 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9436 *cost
+= extra_cost
->alu
.log_shift
;
9438 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9439 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9443 if (CONST_INT_P (XEXP (x
, 1)))
9445 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9446 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9449 *cost
= COSTS_N_INSNS (insns
);
9451 *cost
+= insns
* extra_cost
->alu
.logical
;
9452 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9457 *cost
+= extra_cost
->alu
.logical
;
9458 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9459 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9465 rtx op0
= XEXP (x
, 0);
9466 enum rtx_code subcode
= GET_CODE (op0
);
9468 *cost
+= COSTS_N_INSNS (1);
9472 || (code
== IOR
&& TARGET_THUMB2
)))
9473 op0
= XEXP (op0
, 0);
9475 if (GET_CODE (op0
) == ZERO_EXTEND
)
9478 *cost
+= 2 * extra_cost
->alu
.logical
;
9480 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9482 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9485 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9488 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9490 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9492 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9497 *cost
+= 2 * extra_cost
->alu
.logical
;
9503 *cost
= LIBCALL_COST (2);
9507 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9508 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9510 rtx op0
= XEXP (x
, 0);
9512 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
9513 op0
= XEXP (op0
, 0);
9516 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9518 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
9519 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
9522 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9524 *cost
= LIBCALL_COST (2);
9530 if (TARGET_DSP_MULTIPLY
9531 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9532 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9533 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9534 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9535 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9536 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
9537 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
9538 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
9539 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9540 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9541 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9542 && (INTVAL (XEXP (XEXP (x
, 1), 1))
9547 *cost
+= extra_cost
->mult
[0].extend
;
9548 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
9549 SIGN_EXTEND
, 0, speed_p
);
9550 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
9551 SIGN_EXTEND
, 1, speed_p
);
9555 *cost
+= extra_cost
->mult
[0].simple
;
9562 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9563 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
9564 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9565 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
9568 *cost
+= extra_cost
->mult
[1].extend
;
9569 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
9570 ZERO_EXTEND
, 0, speed_p
)
9571 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9572 ZERO_EXTEND
, 0, speed_p
));
9576 *cost
= LIBCALL_COST (2);
9581 *cost
= LIBCALL_COST (2);
9585 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9586 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9588 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9591 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
9596 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9600 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9602 *cost
= LIBCALL_COST (1);
9608 if (GET_CODE (XEXP (x
, 0)) == ABS
)
9610 *cost
+= COSTS_N_INSNS (1);
9611 /* Assume the non-flag-changing variant. */
9613 *cost
+= (extra_cost
->alu
.log_shift
9614 + extra_cost
->alu
.arith_shift
);
9615 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
9619 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
9620 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
9622 *cost
+= COSTS_N_INSNS (1);
9623 /* No extra cost for MOV imm and MVN imm. */
9624 /* If the comparison op is using the flags, there's no further
9625 cost, otherwise we need to add the cost of the comparison. */
9626 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
9627 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
9628 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
9630 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
9631 *cost
+= (COSTS_N_INSNS (1)
9632 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
9634 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
9637 *cost
+= extra_cost
->alu
.arith
;
9643 *cost
+= extra_cost
->alu
.arith
;
9647 if (GET_MODE_CLASS (mode
) == MODE_INT
9648 && GET_MODE_SIZE (mode
) < 4)
9650 /* Slightly disparage, as we might need an extend operation. */
9653 *cost
+= extra_cost
->alu
.arith
;
9659 *cost
+= COSTS_N_INSNS (1);
9661 *cost
+= 2 * extra_cost
->alu
.arith
;
9666 *cost
= LIBCALL_COST (1);
9673 rtx shift_reg
= NULL
;
9675 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9679 if (shift_reg
!= NULL
)
9682 *cost
+= extra_cost
->alu
.log_shift_reg
;
9683 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9686 *cost
+= extra_cost
->alu
.log_shift
;
9687 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
9692 *cost
+= extra_cost
->alu
.logical
;
9697 *cost
+= COSTS_N_INSNS (1);
9703 *cost
+= LIBCALL_COST (1);
9708 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9710 *cost
+= COSTS_N_INSNS (3);
9713 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
9714 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
9716 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
9717 /* Assume that if one arm of the if_then_else is a register,
9718 that it will be tied with the result and eliminate the
9719 conditional insn. */
9720 if (REG_P (XEXP (x
, 1)))
9722 else if (REG_P (XEXP (x
, 2)))
9728 if (extra_cost
->alu
.non_exec_costs_exec
)
9729 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
9731 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
9734 *cost
+= op1cost
+ op2cost
;
9740 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
9744 machine_mode op0mode
;
9745 /* We'll mostly assume that the cost of a compare is the cost of the
9746 LHS. However, there are some notable exceptions. */
9748 /* Floating point compares are never done as side-effects. */
9749 op0mode
= GET_MODE (XEXP (x
, 0));
9750 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
9751 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9754 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
9756 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
9758 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
9764 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
9766 *cost
= LIBCALL_COST (2);
9770 /* DImode compares normally take two insns. */
9771 if (op0mode
== DImode
)
9773 *cost
+= COSTS_N_INSNS (1);
9775 *cost
+= 2 * extra_cost
->alu
.arith
;
9779 if (op0mode
== SImode
)
9784 if (XEXP (x
, 1) == const0_rtx
9785 && !(REG_P (XEXP (x
, 0))
9786 || (GET_CODE (XEXP (x
, 0)) == SUBREG
9787 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
9789 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
9791 /* Multiply operations that set the flags are often
9792 significantly more expensive. */
9794 && GET_CODE (XEXP (x
, 0)) == MULT
9795 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
9796 *cost
+= extra_cost
->mult
[0].flag_setting
;
9799 && GET_CODE (XEXP (x
, 0)) == PLUS
9800 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
9801 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
9803 *cost
+= extra_cost
->mult
[0].flag_setting
;
9808 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9809 if (shift_op
!= NULL
)
9811 if (shift_reg
!= NULL
)
9813 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
9816 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9819 *cost
+= extra_cost
->alu
.arith_shift
;
9820 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
9821 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
9826 *cost
+= extra_cost
->alu
.arith
;
9827 if (CONST_INT_P (XEXP (x
, 1))
9828 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
9830 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
9838 *cost
= LIBCALL_COST (2);
9861 if (outer_code
== SET
)
9863 /* Is it a store-flag operation? */
9864 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
9865 && XEXP (x
, 1) == const0_rtx
)
9867 /* Thumb also needs an IT insn. */
9868 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
9871 if (XEXP (x
, 1) == const0_rtx
)
9876 /* LSR Rd, Rn, #31. */
9878 *cost
+= extra_cost
->alu
.shift
;
9888 *cost
+= COSTS_N_INSNS (1);
9892 /* RSBS T1, Rn, Rn, LSR #31
9894 *cost
+= COSTS_N_INSNS (1);
9896 *cost
+= extra_cost
->alu
.arith_shift
;
9900 /* RSB Rd, Rn, Rn, ASR #1
9902 *cost
+= COSTS_N_INSNS (1);
9904 *cost
+= (extra_cost
->alu
.arith_shift
9905 + extra_cost
->alu
.shift
);
9911 *cost
+= COSTS_N_INSNS (1);
9913 *cost
+= extra_cost
->alu
.shift
;
9917 /* Remaining cases are either meaningless or would take
9918 three insns anyway. */
9919 *cost
= COSTS_N_INSNS (3);
9922 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9927 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
9928 if (CONST_INT_P (XEXP (x
, 1))
9929 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
9931 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9938 /* Not directly inside a set. If it involves the condition code
9939 register it must be the condition for a branch, cond_exec or
9940 I_T_E operation. Since the comparison is performed elsewhere
9941 this is just the control part which has no additional
9943 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
9944 && XEXP (x
, 1) == const0_rtx
)
9952 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9953 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9956 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
9960 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9962 *cost
= LIBCALL_COST (1);
9969 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
9973 *cost
= LIBCALL_COST (1);
9977 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
9978 && MEM_P (XEXP (x
, 0)))
9981 *cost
+= COSTS_N_INSNS (1);
9986 if (GET_MODE (XEXP (x
, 0)) == SImode
)
9987 *cost
+= extra_cost
->ldst
.load
;
9989 *cost
+= extra_cost
->ldst
.load_sign_extend
;
9992 *cost
+= extra_cost
->alu
.shift
;
9997 /* Widening from less than 32-bits requires an extend operation. */
9998 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10000 /* We have SXTB/SXTH. */
10001 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10003 *cost
+= extra_cost
->alu
.extend
;
10005 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10007 /* Needs two shifts. */
10008 *cost
+= COSTS_N_INSNS (1);
10009 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10011 *cost
+= 2 * extra_cost
->alu
.shift
;
10014 /* Widening beyond 32-bits requires one more insn. */
10015 if (mode
== DImode
)
10017 *cost
+= COSTS_N_INSNS (1);
10019 *cost
+= extra_cost
->alu
.shift
;
10026 || GET_MODE (XEXP (x
, 0)) == SImode
10027 || GET_MODE (XEXP (x
, 0)) == QImode
)
10028 && MEM_P (XEXP (x
, 0)))
10030 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10032 if (mode
== DImode
)
10033 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10038 /* Widening from less than 32-bits requires an extend operation. */
10039 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10041 /* UXTB can be a shorter instruction in Thumb2, but it might
10042 be slower than the AND Rd, Rn, #255 alternative. When
10043 optimizing for speed it should never be slower to use
10044 AND, and we don't really model 16-bit vs 32-bit insns
10047 *cost
+= extra_cost
->alu
.logical
;
10049 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10051 /* We have UXTB/UXTH. */
10052 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10054 *cost
+= extra_cost
->alu
.extend
;
10056 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10058 /* Needs two shifts. It's marginally preferable to use
10059 shifts rather than two BIC instructions as the second
10060 shift may merge with a subsequent insn as a shifter
10062 *cost
= COSTS_N_INSNS (2);
10063 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10065 *cost
+= 2 * extra_cost
->alu
.shift
;
10068 /* Widening beyond 32-bits requires one more insn. */
10069 if (mode
== DImode
)
10071 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10078 /* CONST_INT has no mode, so we cannot tell for sure how many
10079 insns are really going to be needed. The best we can do is
10080 look at the value passed. If it fits in SImode, then assume
10081 that's the mode it will be used for. Otherwise assume it
10082 will be used in DImode. */
10083 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10088 /* Avoid blowing up in arm_gen_constant (). */
10089 if (!(outer_code
== PLUS
10090 || outer_code
== AND
10091 || outer_code
== IOR
10092 || outer_code
== XOR
10093 || outer_code
== MINUS
))
10097 if (mode
== SImode
)
10099 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10100 INTVAL (x
), NULL
, NULL
,
10106 *cost
+= COSTS_N_INSNS (arm_gen_constant
10107 (outer_code
, SImode
, NULL
,
10108 trunc_int_for_mode (INTVAL (x
), SImode
),
10110 + arm_gen_constant (outer_code
, SImode
, NULL
,
10111 INTVAL (x
) >> 32, NULL
,
10123 if (arm_arch_thumb2
&& !flag_pic
)
10124 *cost
+= COSTS_N_INSNS (1);
10126 *cost
+= extra_cost
->ldst
.load
;
10129 *cost
+= COSTS_N_INSNS (1);
10133 *cost
+= COSTS_N_INSNS (1);
10135 *cost
+= extra_cost
->alu
.arith
;
10141 *cost
= COSTS_N_INSNS (4);
10146 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10147 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10149 if (vfp3_const_double_rtx (x
))
10152 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10158 if (mode
== DFmode
)
10159 *cost
+= extra_cost
->ldst
.loadd
;
10161 *cost
+= extra_cost
->ldst
.loadf
;
10164 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10168 *cost
= COSTS_N_INSNS (4);
10174 && TARGET_HARD_FLOAT
10175 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10176 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10177 *cost
= COSTS_N_INSNS (1);
10179 *cost
= COSTS_N_INSNS (4);
10184 /* When optimizing for size, we prefer constant pool entries to
10185 MOVW/MOVT pairs, so bump the cost of these slightly. */
10192 *cost
+= extra_cost
->alu
.clz
;
10196 if (XEXP (x
, 1) == const0_rtx
)
10199 *cost
+= extra_cost
->alu
.log_shift
;
10200 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10203 /* Fall through. */
10207 *cost
+= COSTS_N_INSNS (1);
10211 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10212 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10213 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10214 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10215 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10216 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10217 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10218 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10222 *cost
+= extra_cost
->mult
[1].extend
;
10223 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10224 ZERO_EXTEND
, 0, speed_p
)
10225 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10226 ZERO_EXTEND
, 0, speed_p
));
10229 *cost
= LIBCALL_COST (1);
10232 case UNSPEC_VOLATILE
:
10234 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10237 /* Reading the PC is like reading any other register. Writing it
10238 is more expensive, but we take that into account elsewhere. */
10243 /* TODO: Simple zero_extract of bottom bits using AND. */
10244 /* Fall through. */
10248 && CONST_INT_P (XEXP (x
, 1))
10249 && CONST_INT_P (XEXP (x
, 2)))
10252 *cost
+= extra_cost
->alu
.bfx
;
10253 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10256 /* Without UBFX/SBFX, need to resort to shift operations. */
10257 *cost
+= COSTS_N_INSNS (1);
10259 *cost
+= 2 * extra_cost
->alu
.shift
;
10260 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10264 if (TARGET_HARD_FLOAT
)
10267 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10268 if (!TARGET_FPU_ARMV8
10269 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10271 /* Pre v8, widening HF->DF is a two-step process, first
10272 widening to SFmode. */
10273 *cost
+= COSTS_N_INSNS (1);
10275 *cost
+= extra_cost
->fp
[0].widen
;
10277 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10281 *cost
= LIBCALL_COST (1);
10284 case FLOAT_TRUNCATE
:
10285 if (TARGET_HARD_FLOAT
)
10288 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10289 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10291 /* Vector modes? */
10293 *cost
= LIBCALL_COST (1);
10297 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10299 rtx op0
= XEXP (x
, 0);
10300 rtx op1
= XEXP (x
, 1);
10301 rtx op2
= XEXP (x
, 2);
10304 /* vfms or vfnma. */
10305 if (GET_CODE (op0
) == NEG
)
10306 op0
= XEXP (op0
, 0);
10308 /* vfnms or vfnma. */
10309 if (GET_CODE (op2
) == NEG
)
10310 op2
= XEXP (op2
, 0);
10312 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10313 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10314 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10317 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10322 *cost
= LIBCALL_COST (3);
10327 if (TARGET_HARD_FLOAT
)
10329 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10330 a vcvt fixed-point conversion. */
10331 if (code
== FIX
&& mode
== SImode
10332 && GET_CODE (XEXP (x
, 0)) == FIX
10333 && GET_MODE (XEXP (x
, 0)) == SFmode
10334 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10335 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10339 *cost
+= extra_cost
->fp
[0].toint
;
10341 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10346 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10348 mode
= GET_MODE (XEXP (x
, 0));
10350 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10351 /* Strip of the 'cost' of rounding towards zero. */
10352 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10353 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10356 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10357 /* ??? Increase the cost to deal with transferring from
10358 FP -> CORE registers? */
10361 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10362 && TARGET_FPU_ARMV8
)
10365 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10368 /* Vector costs? */
10370 *cost
= LIBCALL_COST (1);
10374 case UNSIGNED_FLOAT
:
10375 if (TARGET_HARD_FLOAT
)
10377 /* ??? Increase the cost to deal with transferring from CORE
10378 -> FP registers? */
10380 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10383 *cost
= LIBCALL_COST (1);
10391 /* Just a guess. Guess number of instructions in the asm
10392 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10393 though (see PR60663). */
10394 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10395 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10397 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10401 if (mode
!= VOIDmode
)
10402 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10404 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10409 #undef HANDLE_NARROW_SHIFT_ARITH
10411 /* RTX costs entry point. */
10414 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10415 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10418 int code
= GET_CODE (x
);
10419 gcc_assert (current_tune
->insn_extra_cost
);
10421 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10422 (enum rtx_code
) outer_code
,
10423 current_tune
->insn_extra_cost
,
10426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10428 print_rtl_single (dump_file
, x
);
10429 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10430 *total
, result
? "final" : "partial");
10435 /* All address computations that can be done are free, but rtx cost returns
10436 the same for practically all of them. So we weight the different types
10437 of address here in the order (most pref first):
10438 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10440 arm_arm_address_cost (rtx x
)
10442 enum rtx_code c
= GET_CODE (x
);
10444 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10446 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10451 if (CONST_INT_P (XEXP (x
, 1)))
10454 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10464 arm_thumb_address_cost (rtx x
)
10466 enum rtx_code c
= GET_CODE (x
);
10471 && REG_P (XEXP (x
, 0))
10472 && CONST_INT_P (XEXP (x
, 1)))
10479 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10480 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10482 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10485 /* Adjust cost hook for XScale. */
10487 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10490 /* Some true dependencies can have a higher cost depending
10491 on precisely how certain input operands are used. */
10493 && recog_memoized (insn
) >= 0
10494 && recog_memoized (dep
) >= 0)
10496 int shift_opnum
= get_attr_shift (insn
);
10497 enum attr_type attr_type
= get_attr_type (dep
);
10499 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10500 operand for INSN. If we have a shifted input operand and the
10501 instruction we depend on is another ALU instruction, then we may
10502 have to account for an additional stall. */
10503 if (shift_opnum
!= 0
10504 && (attr_type
== TYPE_ALU_SHIFT_IMM
10505 || attr_type
== TYPE_ALUS_SHIFT_IMM
10506 || attr_type
== TYPE_LOGIC_SHIFT_IMM
10507 || attr_type
== TYPE_LOGICS_SHIFT_IMM
10508 || attr_type
== TYPE_ALU_SHIFT_REG
10509 || attr_type
== TYPE_ALUS_SHIFT_REG
10510 || attr_type
== TYPE_LOGIC_SHIFT_REG
10511 || attr_type
== TYPE_LOGICS_SHIFT_REG
10512 || attr_type
== TYPE_MOV_SHIFT
10513 || attr_type
== TYPE_MVN_SHIFT
10514 || attr_type
== TYPE_MOV_SHIFT_REG
10515 || attr_type
== TYPE_MVN_SHIFT_REG
))
10517 rtx shifted_operand
;
10520 /* Get the shifted operand. */
10521 extract_insn (insn
);
10522 shifted_operand
= recog_data
.operand
[shift_opnum
];
10524 /* Iterate over all the operands in DEP. If we write an operand
10525 that overlaps with SHIFTED_OPERAND, then we have increase the
10526 cost of this dependency. */
10527 extract_insn (dep
);
10528 preprocess_constraints (dep
);
10529 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
10531 /* We can ignore strict inputs. */
10532 if (recog_data
.operand_type
[opno
] == OP_IN
)
10535 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
10547 /* Adjust cost hook for Cortex A9. */
10549 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10559 case REG_DEP_OUTPUT
:
10560 if (recog_memoized (insn
) >= 0
10561 && recog_memoized (dep
) >= 0)
10563 if (GET_CODE (PATTERN (insn
)) == SET
)
10566 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
10568 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
10570 enum attr_type attr_type_insn
= get_attr_type (insn
);
10571 enum attr_type attr_type_dep
= get_attr_type (dep
);
10573 /* By default all dependencies of the form
10576 have an extra latency of 1 cycle because
10577 of the input and output dependency in this
10578 case. However this gets modeled as an true
10579 dependency and hence all these checks. */
10580 if (REG_P (SET_DEST (PATTERN (insn
)))
10581 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
10583 /* FMACS is a special case where the dependent
10584 instruction can be issued 3 cycles before
10585 the normal latency in case of an output
10587 if ((attr_type_insn
== TYPE_FMACS
10588 || attr_type_insn
== TYPE_FMACD
)
10589 && (attr_type_dep
== TYPE_FMACS
10590 || attr_type_dep
== TYPE_FMACD
))
10592 if (dep_type
== REG_DEP_OUTPUT
)
10593 *cost
= insn_default_latency (dep
) - 3;
10595 *cost
= insn_default_latency (dep
);
10600 if (dep_type
== REG_DEP_OUTPUT
)
10601 *cost
= insn_default_latency (dep
) + 1;
10603 *cost
= insn_default_latency (dep
);
10613 gcc_unreachable ();
10619 /* Adjust cost hook for FA726TE. */
10621 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10624 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
10625 have penalty of 3. */
10626 if (dep_type
== REG_DEP_TRUE
10627 && recog_memoized (insn
) >= 0
10628 && recog_memoized (dep
) >= 0
10629 && get_attr_conds (dep
) == CONDS_SET
)
10631 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
10632 if (get_attr_conds (insn
) == CONDS_USE
10633 && get_attr_type (insn
) != TYPE_BRANCH
)
10639 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
10640 || get_attr_conds (insn
) == CONDS_USE
)
10650 /* Implement TARGET_REGISTER_MOVE_COST.
10652 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
10653 it is typically more expensive than a single memory access. We set
10654 the cost to less than two memory accesses so that floating
10655 point to integer conversion does not go through memory. */
10658 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
10659 reg_class_t from
, reg_class_t to
)
10663 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
10664 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
10666 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
10667 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
10669 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
10676 if (from
== HI_REGS
|| to
== HI_REGS
)
10683 /* Implement TARGET_MEMORY_MOVE_COST. */
10686 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
10687 bool in ATTRIBUTE_UNUSED
)
10693 if (GET_MODE_SIZE (mode
) < 4)
10696 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
10700 /* Vectorizer cost model implementation. */
10702 /* Implement targetm.vectorize.builtin_vectorization_cost. */
10704 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
10706 int misalign ATTRIBUTE_UNUSED
)
10710 switch (type_of_cost
)
10713 return current_tune
->vec_costs
->scalar_stmt_cost
;
10716 return current_tune
->vec_costs
->scalar_load_cost
;
10719 return current_tune
->vec_costs
->scalar_store_cost
;
10722 return current_tune
->vec_costs
->vec_stmt_cost
;
10725 return current_tune
->vec_costs
->vec_align_load_cost
;
10728 return current_tune
->vec_costs
->vec_store_cost
;
10730 case vec_to_scalar
:
10731 return current_tune
->vec_costs
->vec_to_scalar_cost
;
10733 case scalar_to_vec
:
10734 return current_tune
->vec_costs
->scalar_to_vec_cost
;
10736 case unaligned_load
:
10737 return current_tune
->vec_costs
->vec_unalign_load_cost
;
10739 case unaligned_store
:
10740 return current_tune
->vec_costs
->vec_unalign_store_cost
;
10742 case cond_branch_taken
:
10743 return current_tune
->vec_costs
->cond_taken_branch_cost
;
10745 case cond_branch_not_taken
:
10746 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
10749 case vec_promote_demote
:
10750 return current_tune
->vec_costs
->vec_stmt_cost
;
10752 case vec_construct
:
10753 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
10754 return elements
/ 2 + 1;
10757 gcc_unreachable ();
10761 /* Implement targetm.vectorize.add_stmt_cost. */
10764 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
10765 struct _stmt_vec_info
*stmt_info
, int misalign
,
10766 enum vect_cost_model_location where
)
10768 unsigned *cost
= (unsigned *) data
;
10769 unsigned retval
= 0;
10771 if (flag_vect_cost_model
)
10773 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
10774 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
10776 /* Statements in an inner loop relative to the loop being
10777 vectorized are weighted more heavily. The value here is
10778 arbitrary and could potentially be improved with analysis. */
10779 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
10780 count
*= 50; /* FIXME. */
10782 retval
= (unsigned) (count
* stmt_cost
);
10783 cost
[where
] += retval
;
10789 /* Return true if and only if this insn can dual-issue only as older. */
10791 cortexa7_older_only (rtx_insn
*insn
)
10793 if (recog_memoized (insn
) < 0)
10796 switch (get_attr_type (insn
))
10798 case TYPE_ALU_DSP_REG
:
10799 case TYPE_ALU_SREG
:
10800 case TYPE_ALUS_SREG
:
10801 case TYPE_LOGIC_REG
:
10802 case TYPE_LOGICS_REG
:
10804 case TYPE_ADCS_REG
:
10809 case TYPE_SHIFT_IMM
:
10810 case TYPE_SHIFT_REG
:
10811 case TYPE_LOAD_BYTE
:
10814 case TYPE_FFARITHS
:
10816 case TYPE_FFARITHD
:
10834 case TYPE_F_STORES
:
10841 /* Return true if and only if this insn can dual-issue as younger. */
10843 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
10845 if (recog_memoized (insn
) < 0)
10848 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
10852 switch (get_attr_type (insn
))
10855 case TYPE_ALUS_IMM
:
10856 case TYPE_LOGIC_IMM
:
10857 case TYPE_LOGICS_IMM
:
10862 case TYPE_MOV_SHIFT
:
10863 case TYPE_MOV_SHIFT_REG
:
10873 /* Look for an instruction that can dual issue only as an older
10874 instruction, and move it in front of any instructions that can
10875 dual-issue as younger, while preserving the relative order of all
10876 other instructions in the ready list. This is a hueuristic to help
10877 dual-issue in later cycles, by postponing issue of more flexible
10878 instructions. This heuristic may affect dual issue opportunities
10879 in the current cycle. */
10881 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
10882 int *n_readyp
, int clock
)
10885 int first_older_only
= -1, first_younger
= -1;
10889 ";; sched_reorder for cycle %d with %d insns in ready list\n",
10893 /* Traverse the ready list from the head (the instruction to issue
10894 first), and looking for the first instruction that can issue as
10895 younger and the first instruction that can dual-issue only as
10897 for (i
= *n_readyp
- 1; i
>= 0; i
--)
10899 rtx_insn
*insn
= ready
[i
];
10900 if (cortexa7_older_only (insn
))
10902 first_older_only
= i
;
10904 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
10907 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
10911 /* Nothing to reorder because either no younger insn found or insn
10912 that can dual-issue only as older appears before any insn that
10913 can dual-issue as younger. */
10914 if (first_younger
== -1)
10917 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
10921 /* Nothing to reorder because no older-only insn in the ready list. */
10922 if (first_older_only
== -1)
10925 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
10929 /* Move first_older_only insn before first_younger. */
10931 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
10932 INSN_UID(ready
[first_older_only
]),
10933 INSN_UID(ready
[first_younger
]));
10934 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
10935 for (i
= first_older_only
; i
< first_younger
; i
++)
10937 ready
[i
] = ready
[i
+1];
10940 ready
[i
] = first_older_only_insn
;
10944 /* Implement TARGET_SCHED_REORDER. */
10946 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
10952 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
10955 /* Do nothing for other cores. */
10959 return arm_issue_rate ();
10962 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
10963 It corrects the value of COST based on the relationship between
10964 INSN and DEP through the dependence LINK. It returns the new
10965 value. There is a per-core adjust_cost hook to adjust scheduler costs
10966 and the per-core hook can choose to completely override the generic
10967 adjust_cost function. Only put bits of code into arm_adjust_cost that
10968 are common across all cores. */
10970 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
10975 /* When generating Thumb-1 code, we want to place flag-setting operations
10976 close to a conditional branch which depends on them, so that we can
10977 omit the comparison. */
10980 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
10981 && recog_memoized (dep
) >= 0
10982 && get_attr_conds (dep
) == CONDS_SET
)
10985 if (current_tune
->sched_adjust_cost
!= NULL
)
10987 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
10991 /* XXX Is this strictly true? */
10992 if (dep_type
== REG_DEP_ANTI
10993 || dep_type
== REG_DEP_OUTPUT
)
10996 /* Call insns don't incur a stall, even if they follow a load. */
11001 if ((i_pat
= single_set (insn
)) != NULL
11002 && MEM_P (SET_SRC (i_pat
))
11003 && (d_pat
= single_set (dep
)) != NULL
11004 && MEM_P (SET_DEST (d_pat
)))
11006 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11007 /* This is a load after a store, there is no conflict if the load reads
11008 from a cached area. Assume that loads from the stack, and from the
11009 constant pool are cached, and that others will miss. This is a
11012 if ((GET_CODE (src_mem
) == SYMBOL_REF
11013 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11014 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11015 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11016 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11024 arm_max_conditional_execute (void)
11026 return max_insns_skipped
;
11030 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11033 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11035 return (optimize
> 0) ? 2 : 0;
11039 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11041 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11044 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11045 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11046 sequences of non-executed instructions in IT blocks probably take the same
11047 amount of time as executed instructions (and the IT instruction itself takes
11048 space in icache). This function was experimentally determined to give good
11049 results on a popular embedded benchmark. */
11052 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11054 return (TARGET_32BIT
&& speed_p
) ? 1
11055 : arm_default_branch_cost (speed_p
, predictable_p
);
11059 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11061 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11064 static bool fp_consts_inited
= false;
11066 static REAL_VALUE_TYPE value_fp0
;
11069 init_fp_table (void)
11073 r
= REAL_VALUE_ATOF ("0", DFmode
);
11075 fp_consts_inited
= true;
11078 /* Return TRUE if rtx X is a valid immediate FP constant. */
11080 arm_const_double_rtx (rtx x
)
11082 const REAL_VALUE_TYPE
*r
;
11084 if (!fp_consts_inited
)
11087 r
= CONST_DOUBLE_REAL_VALUE (x
);
11088 if (REAL_VALUE_MINUS_ZERO (*r
))
11091 if (real_equal (r
, &value_fp0
))
11097 /* VFPv3 has a fairly wide range of representable immediates, formed from
11098 "quarter-precision" floating-point values. These can be evaluated using this
11099 formula (with ^ for exponentiation):
11103 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11104 16 <= n <= 31 and 0 <= r <= 7.
11106 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11108 - A (most-significant) is the sign bit.
11109 - BCD are the exponent (encoded as r XOR 3).
11110 - EFGH are the mantissa (encoded as n - 16).
11113 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11114 fconst[sd] instruction, or -1 if X isn't suitable. */
11116 vfp3_const_double_index (rtx x
)
11118 REAL_VALUE_TYPE r
, m
;
11119 int sign
, exponent
;
11120 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11121 unsigned HOST_WIDE_INT mask
;
11122 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11125 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11128 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11130 /* We can't represent these things, so detect them first. */
11131 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11134 /* Extract sign, exponent and mantissa. */
11135 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11136 r
= real_value_abs (&r
);
11137 exponent
= REAL_EXP (&r
);
11138 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11139 highest (sign) bit, with a fixed binary point at bit point_pos.
11140 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11141 bits for the mantissa, this may fail (low bits would be lost). */
11142 real_ldexp (&m
, &r
, point_pos
- exponent
);
11143 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11144 mantissa
= w
.elt (0);
11145 mant_hi
= w
.elt (1);
11147 /* If there are bits set in the low part of the mantissa, we can't
11148 represent this value. */
11152 /* Now make it so that mantissa contains the most-significant bits, and move
11153 the point_pos to indicate that the least-significant bits have been
11155 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11156 mantissa
= mant_hi
;
11158 /* We can permit four significant bits of mantissa only, plus a high bit
11159 which is always 1. */
11160 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11161 if ((mantissa
& mask
) != 0)
11164 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11165 mantissa
>>= point_pos
- 5;
11167 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11168 floating-point immediate zero with Neon using an integer-zero load, but
11169 that case is handled elsewhere.) */
11173 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11175 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11176 normalized significands are in the range [1, 2). (Our mantissa is shifted
11177 left 4 places at this point relative to normalized IEEE754 values). GCC
11178 internally uses [0.5, 1) (see real.c), so the exponent returned from
11179 REAL_EXP must be altered. */
11180 exponent
= 5 - exponent
;
11182 if (exponent
< 0 || exponent
> 7)
11185 /* Sign, mantissa and exponent are now in the correct form to plug into the
11186 formula described in the comment above. */
11187 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11190 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11192 vfp3_const_double_rtx (rtx x
)
11197 return vfp3_const_double_index (x
) != -1;
11200 /* Recognize immediates which can be used in various Neon instructions. Legal
11201 immediates are described by the following table (for VMVN variants, the
11202 bitwise inverse of the constant shown is recognized. In either case, VMOV
11203 is output and the correct instruction to use for a given constant is chosen
11204 by the assembler). The constant shown is replicated across all elements of
11205 the destination vector.
11207 insn elems variant constant (binary)
11208 ---- ----- ------- -----------------
11209 vmov i32 0 00000000 00000000 00000000 abcdefgh
11210 vmov i32 1 00000000 00000000 abcdefgh 00000000
11211 vmov i32 2 00000000 abcdefgh 00000000 00000000
11212 vmov i32 3 abcdefgh 00000000 00000000 00000000
11213 vmov i16 4 00000000 abcdefgh
11214 vmov i16 5 abcdefgh 00000000
11215 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11216 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11217 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11218 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11219 vmvn i16 10 00000000 abcdefgh
11220 vmvn i16 11 abcdefgh 00000000
11221 vmov i32 12 00000000 00000000 abcdefgh 11111111
11222 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11223 vmov i32 14 00000000 abcdefgh 11111111 11111111
11224 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11225 vmov i8 16 abcdefgh
11226 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11227 eeeeeeee ffffffff gggggggg hhhhhhhh
11228 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11229 vmov f32 19 00000000 00000000 00000000 00000000
11231 For case 18, B = !b. Representable values are exactly those accepted by
11232 vfp3_const_double_index, but are output as floating-point numbers rather
11235 For case 19, we will change it to vmov.i32 when assembling.
11237 Variants 0-5 (inclusive) may also be used as immediates for the second
11238 operand of VORR/VBIC instructions.
11240 The INVERSE argument causes the bitwise inverse of the given operand to be
11241 recognized instead (used for recognizing legal immediates for the VAND/VORN
11242 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11243 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11244 output, rather than the real insns vbic/vorr).
11246 INVERSE makes no difference to the recognition of float vectors.
11248 The return value is the variant of immediate as shown in the above table, or
11249 -1 if the given value doesn't match any of the listed patterns.
11252 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11253 rtx
*modconst
, int *elementwidth
)
11255 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11257 for (i = 0; i < idx; i += (STRIDE)) \
11262 immtype = (CLASS); \
11263 elsize = (ELSIZE); \
11267 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11268 unsigned int innersize
;
11269 unsigned char bytes
[16];
11270 int immtype
= -1, matches
;
11271 unsigned int invmask
= inverse
? 0xff : 0;
11272 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11275 n_elts
= CONST_VECTOR_NUNITS (op
);
11279 if (mode
== VOIDmode
)
11283 innersize
= GET_MODE_UNIT_SIZE (mode
);
11285 /* Vectors of float constants. */
11286 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11288 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11290 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11293 /* FP16 vectors cannot be represented. */
11294 if (GET_MODE_INNER (mode
) == HFmode
)
11297 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11298 are distinct in this context. */
11299 if (!const_vec_duplicate_p (op
))
11303 *modconst
= CONST_VECTOR_ELT (op
, 0);
11308 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11314 /* Splat vector constant out into a byte vector. */
11315 for (i
= 0; i
< n_elts
; i
++)
11317 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11318 unsigned HOST_WIDE_INT elpart
;
11320 gcc_assert (CONST_INT_P (el
));
11321 elpart
= INTVAL (el
);
11323 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11325 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11326 elpart
>>= BITS_PER_UNIT
;
11330 /* Sanity check. */
11331 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11335 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11336 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11338 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11339 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11341 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11342 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11344 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11345 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11347 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11349 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11351 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11352 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11354 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11355 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11357 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11358 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11360 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11361 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11363 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11365 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11367 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11368 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11370 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11371 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11373 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11374 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11376 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11377 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11379 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11381 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11382 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11390 *elementwidth
= elsize
;
11394 unsigned HOST_WIDE_INT imm
= 0;
11396 /* Un-invert bytes of recognized vector, if necessary. */
11398 for (i
= 0; i
< idx
; i
++)
11399 bytes
[i
] ^= invmask
;
11403 /* FIXME: Broken on 32-bit H_W_I hosts. */
11404 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11406 for (i
= 0; i
< 8; i
++)
11407 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11408 << (i
* BITS_PER_UNIT
);
11410 *modconst
= GEN_INT (imm
);
11414 unsigned HOST_WIDE_INT imm
= 0;
11416 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11417 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11419 *modconst
= GEN_INT (imm
);
11427 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11428 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11429 float elements), and a modified constant (whatever should be output for a
11430 VMOV) in *MODCONST. */
11433 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11434 rtx
*modconst
, int *elementwidth
)
11438 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11444 *modconst
= tmpconst
;
11447 *elementwidth
= tmpwidth
;
11452 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11453 the immediate is valid, write a constant suitable for using as an operand
11454 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11455 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11458 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11459 rtx
*modconst
, int *elementwidth
)
11463 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11465 if (retval
< 0 || retval
> 5)
11469 *modconst
= tmpconst
;
11472 *elementwidth
= tmpwidth
;
11477 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11478 the immediate is valid, write a constant suitable for using as an operand
11479 to VSHR/VSHL to *MODCONST and the corresponding element width to
11480 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11481 because they have different limitations. */
11484 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11485 rtx
*modconst
, int *elementwidth
,
11488 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11489 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11490 unsigned HOST_WIDE_INT last_elt
= 0;
11491 unsigned HOST_WIDE_INT maxshift
;
11493 /* Split vector constant out into a byte vector. */
11494 for (i
= 0; i
< n_elts
; i
++)
11496 rtx el
= CONST_VECTOR_ELT (op
, i
);
11497 unsigned HOST_WIDE_INT elpart
;
11499 if (CONST_INT_P (el
))
11500 elpart
= INTVAL (el
);
11501 else if (CONST_DOUBLE_P (el
))
11504 gcc_unreachable ();
11506 if (i
!= 0 && elpart
!= last_elt
)
11512 /* Shift less than element size. */
11513 maxshift
= innersize
* 8;
11517 /* Left shift immediate value can be from 0 to <size>-1. */
11518 if (last_elt
>= maxshift
)
11523 /* Right shift immediate value can be from 1 to <size>. */
11524 if (last_elt
== 0 || last_elt
> maxshift
)
11529 *elementwidth
= innersize
* 8;
11532 *modconst
= CONST_VECTOR_ELT (op
, 0);
11537 /* Return a string suitable for output of Neon immediate logic operation
11541 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
11542 int inverse
, int quad
)
11544 int width
, is_valid
;
11545 static char templ
[40];
11547 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
11549 gcc_assert (is_valid
!= 0);
11552 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
11554 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
11559 /* Return a string suitable for output of Neon immediate shift operation
11560 (VSHR or VSHL) MNEM. */
11563 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
11564 machine_mode mode
, int quad
,
11567 int width
, is_valid
;
11568 static char templ
[40];
11570 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
11571 gcc_assert (is_valid
!= 0);
11574 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
11576 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
11581 /* Output a sequence of pairwise operations to implement a reduction.
11582 NOTE: We do "too much work" here, because pairwise operations work on two
11583 registers-worth of operands in one go. Unfortunately we can't exploit those
11584 extra calculations to do the full operation in fewer steps, I don't think.
11585 Although all vector elements of the result but the first are ignored, we
11586 actually calculate the same result in each of the elements. An alternative
11587 such as initially loading a vector with zero to use as each of the second
11588 operands would use up an additional register and take an extra instruction,
11589 for no particular gain. */
11592 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
11593 rtx (*reduc
) (rtx
, rtx
, rtx
))
11595 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
11598 for (i
= parts
/ 2; i
>= 1; i
/= 2)
11600 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
11601 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
11606 /* If VALS is a vector constant that can be loaded into a register
11607 using VDUP, generate instructions to do so and return an RTX to
11608 assign to the register. Otherwise return NULL_RTX. */
11611 neon_vdup_constant (rtx vals
)
11613 machine_mode mode
= GET_MODE (vals
);
11614 machine_mode inner_mode
= GET_MODE_INNER (mode
);
11617 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
11620 if (!const_vec_duplicate_p (vals
, &x
))
11621 /* The elements are not all the same. We could handle repeating
11622 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
11623 {0, C, 0, C, 0, C, 0, C} which can be loaded using
11627 /* We can load this constant by using VDUP and a constant in a
11628 single ARM register. This will be cheaper than a vector
11631 x
= copy_to_mode_reg (inner_mode
, x
);
11632 return gen_rtx_VEC_DUPLICATE (mode
, x
);
11635 /* Generate code to load VALS, which is a PARALLEL containing only
11636 constants (for vec_init) or CONST_VECTOR, efficiently into a
11637 register. Returns an RTX to copy into the register, or NULL_RTX
11638 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11641 neon_make_constant (rtx vals
)
11643 machine_mode mode
= GET_MODE (vals
);
11645 rtx const_vec
= NULL_RTX
;
11646 int n_elts
= GET_MODE_NUNITS (mode
);
11650 if (GET_CODE (vals
) == CONST_VECTOR
)
11652 else if (GET_CODE (vals
) == PARALLEL
)
11654 /* A CONST_VECTOR must contain only CONST_INTs and
11655 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11656 Only store valid constants in a CONST_VECTOR. */
11657 for (i
= 0; i
< n_elts
; ++i
)
11659 rtx x
= XVECEXP (vals
, 0, i
);
11660 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
11663 if (n_const
== n_elts
)
11664 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
11667 gcc_unreachable ();
11669 if (const_vec
!= NULL
11670 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
11671 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
11673 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
11674 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
11675 pipeline cycle; creating the constant takes one or two ARM
11676 pipeline cycles. */
11678 else if (const_vec
!= NULL_RTX
)
11679 /* Load from constant pool. On Cortex-A8 this takes two cycles
11680 (for either double or quad vectors). We can not take advantage
11681 of single-cycle VLD1 because we need a PC-relative addressing
11685 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11686 We can not construct an initializer. */
11690 /* Initialize vector TARGET to VALS. */
11693 neon_expand_vector_init (rtx target
, rtx vals
)
11695 machine_mode mode
= GET_MODE (target
);
11696 machine_mode inner_mode
= GET_MODE_INNER (mode
);
11697 int n_elts
= GET_MODE_NUNITS (mode
);
11698 int n_var
= 0, one_var
= -1;
11699 bool all_same
= true;
11703 for (i
= 0; i
< n_elts
; ++i
)
11705 x
= XVECEXP (vals
, 0, i
);
11706 if (!CONSTANT_P (x
))
11707 ++n_var
, one_var
= i
;
11709 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
11715 rtx constant
= neon_make_constant (vals
);
11716 if (constant
!= NULL_RTX
)
11718 emit_move_insn (target
, constant
);
11723 /* Splat a single non-constant element if we can. */
11724 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
11726 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
11727 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
11731 /* One field is non-constant. Load constant then overwrite varying
11732 field. This is more efficient than using the stack. */
11735 rtx copy
= copy_rtx (vals
);
11736 rtx index
= GEN_INT (one_var
);
11738 /* Load constant part of vector, substitute neighboring value for
11739 varying element. */
11740 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
11741 neon_expand_vector_init (target
, copy
);
11743 /* Insert variable. */
11744 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
11748 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
11751 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
11754 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
11757 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
11760 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
11763 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
11766 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
11769 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
11772 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
11775 gcc_unreachable ();
11780 /* Construct the vector in memory one field at a time
11781 and load the whole vector. */
11782 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
11783 for (i
= 0; i
< n_elts
; i
++)
11784 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
11785 i
* GET_MODE_SIZE (inner_mode
)),
11786 XVECEXP (vals
, 0, i
));
11787 emit_move_insn (target
, mem
);
11790 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
11791 ERR if it doesn't. EXP indicates the source location, which includes the
11792 inlining history for intrinsics. */
11795 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
11796 const_tree exp
, const char *desc
)
11798 HOST_WIDE_INT lane
;
11800 gcc_assert (CONST_INT_P (operand
));
11802 lane
= INTVAL (operand
);
11804 if (lane
< low
|| lane
>= high
)
11807 error ("%K%s %wd out of range %wd - %wd",
11808 exp
, desc
, lane
, low
, high
- 1);
11810 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
11814 /* Bounds-check lanes. */
11817 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
11820 bounds_check (operand
, low
, high
, exp
, "lane");
11823 /* Bounds-check constants. */
11826 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
11828 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
11832 neon_element_bits (machine_mode mode
)
11834 return GET_MODE_UNIT_BITSIZE (mode
);
11838 /* Predicates for `match_operand' and `match_operator'. */
11840 /* Return TRUE if OP is a valid coprocessor memory address pattern.
11841 WB is true if full writeback address modes are allowed and is false
11842 if limited writeback address modes (POST_INC and PRE_DEC) are
11846 arm_coproc_mem_operand (rtx op
, bool wb
)
11850 /* Reject eliminable registers. */
11851 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
11852 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
11853 || reg_mentioned_p (arg_pointer_rtx
, op
)
11854 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
11855 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
11856 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
11857 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
11860 /* Constants are converted into offsets from labels. */
11864 ind
= XEXP (op
, 0);
11866 if (reload_completed
11867 && (GET_CODE (ind
) == LABEL_REF
11868 || (GET_CODE (ind
) == CONST
11869 && GET_CODE (XEXP (ind
, 0)) == PLUS
11870 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
11871 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
11874 /* Match: (mem (reg)). */
11876 return arm_address_register_rtx_p (ind
, 0);
11878 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
11879 acceptable in any case (subject to verification by
11880 arm_address_register_rtx_p). We need WB to be true to accept
11881 PRE_INC and POST_DEC. */
11882 if (GET_CODE (ind
) == POST_INC
11883 || GET_CODE (ind
) == PRE_DEC
11885 && (GET_CODE (ind
) == PRE_INC
11886 || GET_CODE (ind
) == POST_DEC
)))
11887 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
11890 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
11891 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
11892 && GET_CODE (XEXP (ind
, 1)) == PLUS
11893 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
11894 ind
= XEXP (ind
, 1);
11899 if (GET_CODE (ind
) == PLUS
11900 && REG_P (XEXP (ind
, 0))
11901 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
11902 && CONST_INT_P (XEXP (ind
, 1))
11903 && INTVAL (XEXP (ind
, 1)) > -1024
11904 && INTVAL (XEXP (ind
, 1)) < 1024
11905 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
11911 /* Return TRUE if OP is a memory operand which we can load or store a vector
11912 to/from. TYPE is one of the following values:
11913 0 - Vector load/stor (vldr)
11914 1 - Core registers (ldm)
11915 2 - Element/structure loads (vld1)
11918 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
11922 /* Reject eliminable registers. */
11923 if (strict
&& ! (reload_in_progress
|| reload_completed
)
11924 && (reg_mentioned_p (frame_pointer_rtx
, op
)
11925 || reg_mentioned_p (arg_pointer_rtx
, op
)
11926 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
11927 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
11928 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
11929 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
11932 /* Constants are converted into offsets from labels. */
11936 ind
= XEXP (op
, 0);
11938 if (reload_completed
11939 && (GET_CODE (ind
) == LABEL_REF
11940 || (GET_CODE (ind
) == CONST
11941 && GET_CODE (XEXP (ind
, 0)) == PLUS
11942 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
11943 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
11946 /* Match: (mem (reg)). */
11948 return arm_address_register_rtx_p (ind
, 0);
11950 /* Allow post-increment with Neon registers. */
11951 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
11952 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
11953 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
11955 /* Allow post-increment by register for VLDn */
11956 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
11957 && GET_CODE (XEXP (ind
, 1)) == PLUS
11958 && REG_P (XEXP (XEXP (ind
, 1), 1)))
11965 && GET_CODE (ind
) == PLUS
11966 && REG_P (XEXP (ind
, 0))
11967 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
11968 && CONST_INT_P (XEXP (ind
, 1))
11969 && INTVAL (XEXP (ind
, 1)) > -1024
11970 /* For quad modes, we restrict the constant offset to be slightly less
11971 than what the instruction format permits. We have no such constraint
11972 on double mode offsets. (This must match arm_legitimate_index_p.) */
11973 && (INTVAL (XEXP (ind
, 1))
11974 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
11975 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
11981 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
11984 neon_struct_mem_operand (rtx op
)
11988 /* Reject eliminable registers. */
11989 if (! (reload_in_progress
|| reload_completed
)
11990 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
11991 || reg_mentioned_p (arg_pointer_rtx
, op
)
11992 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
11993 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
11994 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
11995 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
11998 /* Constants are converted into offsets from labels. */
12002 ind
= XEXP (op
, 0);
12004 if (reload_completed
12005 && (GET_CODE (ind
) == LABEL_REF
12006 || (GET_CODE (ind
) == CONST
12007 && GET_CODE (XEXP (ind
, 0)) == PLUS
12008 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12009 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12012 /* Match: (mem (reg)). */
12014 return arm_address_register_rtx_p (ind
, 0);
12016 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12017 if (GET_CODE (ind
) == POST_INC
12018 || GET_CODE (ind
) == PRE_DEC
)
12019 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12024 /* Return true if X is a register that will be eliminated later on. */
12026 arm_eliminable_register (rtx x
)
12028 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12029 || REGNO (x
) == ARG_POINTER_REGNUM
12030 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12031 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12034 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12035 coprocessor registers. Otherwise return NO_REGS. */
12038 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12040 if (mode
== HFmode
)
12042 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12043 return GENERAL_REGS
;
12044 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12046 return GENERAL_REGS
;
12049 /* The neon move patterns handle all legitimate vector and struct
12052 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12053 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12054 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12055 || VALID_NEON_STRUCT_MODE (mode
)))
12058 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12061 return GENERAL_REGS
;
12064 /* Values which must be returned in the most-significant end of the return
12068 arm_return_in_msb (const_tree valtype
)
12070 return (TARGET_AAPCS_BASED
12071 && BYTES_BIG_ENDIAN
12072 && (AGGREGATE_TYPE_P (valtype
)
12073 || TREE_CODE (valtype
) == COMPLEX_TYPE
12074 || FIXED_POINT_TYPE_P (valtype
)));
12077 /* Return TRUE if X references a SYMBOL_REF. */
12079 symbol_mentioned_p (rtx x
)
12084 if (GET_CODE (x
) == SYMBOL_REF
)
12087 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12088 are constant offsets, not symbols. */
12089 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12092 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12094 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12100 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12101 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12104 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12111 /* Return TRUE if X references a LABEL_REF. */
12113 label_mentioned_p (rtx x
)
12118 if (GET_CODE (x
) == LABEL_REF
)
12121 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12122 instruction, but they are constant offsets, not symbols. */
12123 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12126 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12127 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12133 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12134 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12137 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12145 tls_mentioned_p (rtx x
)
12147 switch (GET_CODE (x
))
12150 return tls_mentioned_p (XEXP (x
, 0));
12153 if (XINT (x
, 1) == UNSPEC_TLS
)
12156 /* Fall through. */
12162 /* Must not copy any rtx that uses a pc-relative address.
12163 Also, disallow copying of load-exclusive instructions that
12164 may appear after splitting of compare-and-swap-style operations
12165 so as to prevent those loops from being transformed away from their
12166 canonical forms (see PR 69904). */
12169 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12171 /* The tls call insn cannot be copied, as it is paired with a data
12173 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12176 subrtx_iterator::array_type array
;
12177 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12179 const_rtx x
= *iter
;
12180 if (GET_CODE (x
) == UNSPEC
12181 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12182 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12186 rtx set
= single_set (insn
);
12189 rtx src
= SET_SRC (set
);
12190 if (GET_CODE (src
) == ZERO_EXTEND
)
12191 src
= XEXP (src
, 0);
12193 /* Catch the load-exclusive and load-acquire operations. */
12194 if (GET_CODE (src
) == UNSPEC_VOLATILE
12195 && (XINT (src
, 1) == VUNSPEC_LL
12196 || XINT (src
, 1) == VUNSPEC_LAX
))
12203 minmax_code (rtx x
)
12205 enum rtx_code code
= GET_CODE (x
);
12218 gcc_unreachable ();
12222 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12225 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12226 int *mask
, bool *signed_sat
)
12228 /* The high bound must be a power of two minus one. */
12229 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12233 /* The low bound is either zero (for usat) or one less than the
12234 negation of the high bound (for ssat). */
12235 if (INTVAL (lo_bound
) == 0)
12240 *signed_sat
= false;
12245 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12250 *signed_sat
= true;
12258 /* Return 1 if memory locations are adjacent. */
12260 adjacent_mem_locations (rtx a
, rtx b
)
12262 /* We don't guarantee to preserve the order of these memory refs. */
12263 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12266 if ((REG_P (XEXP (a
, 0))
12267 || (GET_CODE (XEXP (a
, 0)) == PLUS
12268 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12269 && (REG_P (XEXP (b
, 0))
12270 || (GET_CODE (XEXP (b
, 0)) == PLUS
12271 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12273 HOST_WIDE_INT val0
= 0, val1
= 0;
12277 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12279 reg0
= XEXP (XEXP (a
, 0), 0);
12280 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12283 reg0
= XEXP (a
, 0);
12285 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12287 reg1
= XEXP (XEXP (b
, 0), 0);
12288 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12291 reg1
= XEXP (b
, 0);
12293 /* Don't accept any offset that will require multiple
12294 instructions to handle, since this would cause the
12295 arith_adjacentmem pattern to output an overlong sequence. */
12296 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12299 /* Don't allow an eliminable register: register elimination can make
12300 the offset too large. */
12301 if (arm_eliminable_register (reg0
))
12304 val_diff
= val1
- val0
;
12308 /* If the target has load delay slots, then there's no benefit
12309 to using an ldm instruction unless the offset is zero and
12310 we are optimizing for size. */
12311 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12312 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12313 && (val_diff
== 4 || val_diff
== -4));
12316 return ((REGNO (reg0
) == REGNO (reg1
))
12317 && (val_diff
== 4 || val_diff
== -4));
12323 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12324 for load operations, false for store operations. CONSECUTIVE is true
12325 if the register numbers in the operation must be consecutive in the register
12326 bank. RETURN_PC is true if value is to be loaded in PC.
12327 The pattern we are trying to match for load is:
12328 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12329 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12332 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12335 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12336 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12337 3. If consecutive is TRUE, then for kth register being loaded,
12338 REGNO (R_dk) = REGNO (R_d0) + k.
12339 The pattern for store is similar. */
12341 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12342 bool consecutive
, bool return_pc
)
12344 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12345 rtx reg
, mem
, addr
;
12347 unsigned first_regno
;
12348 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12350 bool addr_reg_in_reglist
= false;
12351 bool update
= false;
12356 /* If not in SImode, then registers must be consecutive
12357 (e.g., VLDM instructions for DFmode). */
12358 gcc_assert ((mode
== SImode
) || consecutive
);
12359 /* Setting return_pc for stores is illegal. */
12360 gcc_assert (!return_pc
|| load
);
12362 /* Set up the increments and the regs per val based on the mode. */
12363 reg_increment
= GET_MODE_SIZE (mode
);
12364 regs_per_val
= reg_increment
/ 4;
12365 offset_adj
= return_pc
? 1 : 0;
12368 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12369 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12372 /* Check if this is a write-back. */
12373 elt
= XVECEXP (op
, 0, offset_adj
);
12374 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12380 /* The offset adjustment must be the number of registers being
12381 popped times the size of a single register. */
12382 if (!REG_P (SET_DEST (elt
))
12383 || !REG_P (XEXP (SET_SRC (elt
), 0))
12384 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12385 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12386 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12387 ((count
- 1 - offset_adj
) * reg_increment
))
12391 i
= i
+ offset_adj
;
12392 base
= base
+ offset_adj
;
12393 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12394 success depends on the type: VLDM can do just one reg,
12395 LDM must do at least two. */
12396 if ((count
<= i
) && (mode
== SImode
))
12399 elt
= XVECEXP (op
, 0, i
- 1);
12400 if (GET_CODE (elt
) != SET
)
12405 reg
= SET_DEST (elt
);
12406 mem
= SET_SRC (elt
);
12410 reg
= SET_SRC (elt
);
12411 mem
= SET_DEST (elt
);
12414 if (!REG_P (reg
) || !MEM_P (mem
))
12417 regno
= REGNO (reg
);
12418 first_regno
= regno
;
12419 addr
= XEXP (mem
, 0);
12420 if (GET_CODE (addr
) == PLUS
)
12422 if (!CONST_INT_P (XEXP (addr
, 1)))
12425 offset
= INTVAL (XEXP (addr
, 1));
12426 addr
= XEXP (addr
, 0);
12432 /* Don't allow SP to be loaded unless it is also the base register. It
12433 guarantees that SP is reset correctly when an LDM instruction
12434 is interrupted. Otherwise, we might end up with a corrupt stack. */
12435 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12438 for (; i
< count
; i
++)
12440 elt
= XVECEXP (op
, 0, i
);
12441 if (GET_CODE (elt
) != SET
)
12446 reg
= SET_DEST (elt
);
12447 mem
= SET_SRC (elt
);
12451 reg
= SET_SRC (elt
);
12452 mem
= SET_DEST (elt
);
12456 || GET_MODE (reg
) != mode
12457 || REGNO (reg
) <= regno
12460 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12461 /* Don't allow SP to be loaded unless it is also the base register. It
12462 guarantees that SP is reset correctly when an LDM instruction
12463 is interrupted. Otherwise, we might end up with a corrupt stack. */
12464 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12466 || GET_MODE (mem
) != mode
12467 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12468 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12469 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12470 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12471 offset
+ (i
- base
) * reg_increment
))
12472 && (!REG_P (XEXP (mem
, 0))
12473 || offset
+ (i
- base
) * reg_increment
!= 0)))
12476 regno
= REGNO (reg
);
12477 if (regno
== REGNO (addr
))
12478 addr_reg_in_reglist
= true;
12483 if (update
&& addr_reg_in_reglist
)
12486 /* For Thumb-1, address register is always modified - either by write-back
12487 or by explicit load. If the pattern does not describe an update,
12488 then the address register must be in the list of loaded registers. */
12490 return update
|| addr_reg_in_reglist
;
12496 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12497 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12498 instruction. ADD_OFFSET is nonzero if the base address register needs
12499 to be modified with an add instruction before we can use it. */
12502 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
12503 int nops
, HOST_WIDE_INT add_offset
)
12505 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12506 if the offset isn't small enough. The reason 2 ldrs are faster
12507 is because these ARMs are able to do more than one cache access
12508 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12509 whilst the ARM8 has a double bandwidth cache. This means that
12510 these cores can do both an instruction fetch and a data fetch in
12511 a single cycle, so the trick of calculating the address into a
12512 scratch register (one of the result regs) and then doing a load
12513 multiple actually becomes slower (and no smaller in code size).
12514 That is the transformation
12516 ldr rd1, [rbase + offset]
12517 ldr rd2, [rbase + offset + 4]
12521 add rd1, rbase, offset
12522 ldmia rd1, {rd1, rd2}
12524 produces worse code -- '3 cycles + any stalls on rd2' instead of
12525 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12526 access per cycle, the first sequence could never complete in less
12527 than 6 cycles, whereas the ldm sequence would only take 5 and
12528 would make better use of sequential accesses if not hitting the
12531 We cheat here and test 'arm_ld_sched' which we currently know to
12532 only be true for the ARM8, ARM9 and StrongARM. If this ever
12533 changes, then the test below needs to be reworked. */
12534 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
12537 /* XScale has load-store double instructions, but they have stricter
12538 alignment requirements than load-store multiple, so we cannot
12541 For XScale ldm requires 2 + NREGS cycles to complete and blocks
12542 the pipeline until completion.
12550 An ldr instruction takes 1-3 cycles, but does not block the
12559 Best case ldr will always win. However, the more ldr instructions
12560 we issue, the less likely we are to be able to schedule them well.
12561 Using ldr instructions also increases code size.
12563 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
12564 for counts of 3 or 4 regs. */
12565 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
12570 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
12571 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
12572 an array ORDER which describes the sequence to use when accessing the
12573 offsets that produces an ascending order. In this sequence, each
12574 offset must be larger by exactly 4 than the previous one. ORDER[0]
12575 must have been filled in with the lowest offset by the caller.
12576 If UNSORTED_REGS is nonnull, it is an array of register numbers that
12577 we use to verify that ORDER produces an ascending order of registers.
12578 Return true if it was possible to construct such an order, false if
12582 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
12583 int *unsorted_regs
)
12586 for (i
= 1; i
< nops
; i
++)
12590 order
[i
] = order
[i
- 1];
12591 for (j
= 0; j
< nops
; j
++)
12592 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
12594 /* We must find exactly one offset that is higher than the
12595 previous one by 4. */
12596 if (order
[i
] != order
[i
- 1])
12600 if (order
[i
] == order
[i
- 1])
12602 /* The register numbers must be ascending. */
12603 if (unsorted_regs
!= NULL
12604 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
12610 /* Used to determine in a peephole whether a sequence of load
12611 instructions can be changed into a load-multiple instruction.
12612 NOPS is the number of separate load instructions we are examining. The
12613 first NOPS entries in OPERANDS are the destination registers, the
12614 next NOPS entries are memory operands. If this function is
12615 successful, *BASE is set to the common base register of the memory
12616 accesses; *LOAD_OFFSET is set to the first memory location's offset
12617 from that base register.
12618 REGS is an array filled in with the destination register numbers.
12619 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
12620 insn numbers to an ascending order of stores. If CHECK_REGS is true,
12621 the sequence of registers in REGS matches the loads from ascending memory
12622 locations, and the function verifies that the register numbers are
12623 themselves ascending. If CHECK_REGS is false, the register numbers
12624 are stored in the order they are found in the operands. */
12626 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
12627 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
12629 int unsorted_regs
[MAX_LDM_STM_OPS
];
12630 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
12631 int order
[MAX_LDM_STM_OPS
];
12632 rtx base_reg_rtx
= NULL
;
12636 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12637 easily extended if required. */
12638 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
12640 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
12642 /* Loop over the operands and check that the memory references are
12643 suitable (i.e. immediate offsets from the same base register). At
12644 the same time, extract the target register, and the memory
12646 for (i
= 0; i
< nops
; i
++)
12651 /* Convert a subreg of a mem into the mem itself. */
12652 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
12653 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
12655 gcc_assert (MEM_P (operands
[nops
+ i
]));
12657 /* Don't reorder volatile memory references; it doesn't seem worth
12658 looking for the case where the order is ok anyway. */
12659 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
12662 offset
= const0_rtx
;
12664 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
12665 || (GET_CODE (reg
) == SUBREG
12666 && REG_P (reg
= SUBREG_REG (reg
))))
12667 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
12668 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
12669 || (GET_CODE (reg
) == SUBREG
12670 && REG_P (reg
= SUBREG_REG (reg
))))
12671 && (CONST_INT_P (offset
12672 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
12676 base_reg
= REGNO (reg
);
12677 base_reg_rtx
= reg
;
12678 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
12681 else if (base_reg
!= (int) REGNO (reg
))
12682 /* Not addressed from the same base register. */
12685 unsorted_regs
[i
] = (REG_P (operands
[i
])
12686 ? REGNO (operands
[i
])
12687 : REGNO (SUBREG_REG (operands
[i
])));
12689 /* If it isn't an integer register, or if it overwrites the
12690 base register but isn't the last insn in the list, then
12691 we can't do this. */
12692 if (unsorted_regs
[i
] < 0
12693 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
12694 || unsorted_regs
[i
] > 14
12695 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
12698 /* Don't allow SP to be loaded unless it is also the base
12699 register. It guarantees that SP is reset correctly when
12700 an LDM instruction is interrupted. Otherwise, we might
12701 end up with a corrupt stack. */
12702 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
12705 unsorted_offsets
[i
] = INTVAL (offset
);
12706 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
12710 /* Not a suitable memory address. */
12714 /* All the useful information has now been extracted from the
12715 operands into unsorted_regs and unsorted_offsets; additionally,
12716 order[0] has been set to the lowest offset in the list. Sort
12717 the offsets into order, verifying that they are adjacent, and
12718 check that the register numbers are ascending. */
12719 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
12720 check_regs
? unsorted_regs
: NULL
))
12724 memcpy (saved_order
, order
, sizeof order
);
12730 for (i
= 0; i
< nops
; i
++)
12731 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
12733 *load_offset
= unsorted_offsets
[order
[0]];
12737 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
12740 if (unsorted_offsets
[order
[0]] == 0)
12741 ldm_case
= 1; /* ldmia */
12742 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
12743 ldm_case
= 2; /* ldmib */
12744 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
12745 ldm_case
= 3; /* ldmda */
12746 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
12747 ldm_case
= 4; /* ldmdb */
12748 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
12749 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
12754 if (!multiple_operation_profitable_p (false, nops
,
12756 ? unsorted_offsets
[order
[0]] : 0))
12762 /* Used to determine in a peephole whether a sequence of store instructions can
12763 be changed into a store-multiple instruction.
12764 NOPS is the number of separate store instructions we are examining.
12765 NOPS_TOTAL is the total number of instructions recognized by the peephole
12767 The first NOPS entries in OPERANDS are the source registers, the next
12768 NOPS entries are memory operands. If this function is successful, *BASE is
12769 set to the common base register of the memory accesses; *LOAD_OFFSET is set
12770 to the first memory location's offset from that base register. REGS is an
12771 array filled in with the source register numbers, REG_RTXS (if nonnull) is
12772 likewise filled with the corresponding rtx's.
12773 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
12774 numbers to an ascending order of stores.
12775 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
12776 from ascending memory locations, and the function verifies that the register
12777 numbers are themselves ascending. If CHECK_REGS is false, the register
12778 numbers are stored in the order they are found in the operands. */
12780 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
12781 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
12782 HOST_WIDE_INT
*load_offset
, bool check_regs
)
12784 int unsorted_regs
[MAX_LDM_STM_OPS
];
12785 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
12786 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
12787 int order
[MAX_LDM_STM_OPS
];
12789 rtx base_reg_rtx
= NULL
;
12792 /* Write back of base register is currently only supported for Thumb 1. */
12793 int base_writeback
= TARGET_THUMB1
;
12795 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
12796 easily extended if required. */
12797 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
12799 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
12801 /* Loop over the operands and check that the memory references are
12802 suitable (i.e. immediate offsets from the same base register). At
12803 the same time, extract the target register, and the memory
12805 for (i
= 0; i
< nops
; i
++)
12810 /* Convert a subreg of a mem into the mem itself. */
12811 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
12812 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
12814 gcc_assert (MEM_P (operands
[nops
+ i
]));
12816 /* Don't reorder volatile memory references; it doesn't seem worth
12817 looking for the case where the order is ok anyway. */
12818 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
12821 offset
= const0_rtx
;
12823 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
12824 || (GET_CODE (reg
) == SUBREG
12825 && REG_P (reg
= SUBREG_REG (reg
))))
12826 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
12827 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
12828 || (GET_CODE (reg
) == SUBREG
12829 && REG_P (reg
= SUBREG_REG (reg
))))
12830 && (CONST_INT_P (offset
12831 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
12833 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
12834 ? operands
[i
] : SUBREG_REG (operands
[i
]));
12835 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
12839 base_reg
= REGNO (reg
);
12840 base_reg_rtx
= reg
;
12841 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
12844 else if (base_reg
!= (int) REGNO (reg
))
12845 /* Not addressed from the same base register. */
12848 /* If it isn't an integer register, then we can't do this. */
12849 if (unsorted_regs
[i
] < 0
12850 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
12851 /* The effects are unpredictable if the base register is
12852 both updated and stored. */
12853 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
12854 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
12855 || unsorted_regs
[i
] > 14)
12858 unsorted_offsets
[i
] = INTVAL (offset
);
12859 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
12863 /* Not a suitable memory address. */
12867 /* All the useful information has now been extracted from the
12868 operands into unsorted_regs and unsorted_offsets; additionally,
12869 order[0] has been set to the lowest offset in the list. Sort
12870 the offsets into order, verifying that they are adjacent, and
12871 check that the register numbers are ascending. */
12872 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
12873 check_regs
? unsorted_regs
: NULL
))
12877 memcpy (saved_order
, order
, sizeof order
);
12883 for (i
= 0; i
< nops
; i
++)
12885 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
12887 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
12890 *load_offset
= unsorted_offsets
[order
[0]];
12894 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
12897 if (unsorted_offsets
[order
[0]] == 0)
12898 stm_case
= 1; /* stmia */
12899 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
12900 stm_case
= 2; /* stmib */
12901 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
12902 stm_case
= 3; /* stmda */
12903 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
12904 stm_case
= 4; /* stmdb */
12908 if (!multiple_operation_profitable_p (false, nops
, 0))
12914 /* Routines for use in generating RTL. */
12916 /* Generate a load-multiple instruction. COUNT is the number of loads in
12917 the instruction; REGS and MEMS are arrays containing the operands.
12918 BASEREG is the base register to be used in addressing the memory operands.
12919 WBACK_OFFSET is nonzero if the instruction should update the base
12923 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
12924 HOST_WIDE_INT wback_offset
)
12929 if (!multiple_operation_profitable_p (false, count
, 0))
12935 for (i
= 0; i
< count
; i
++)
12936 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
12938 if (wback_offset
!= 0)
12939 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
12941 seq
= get_insns ();
12947 result
= gen_rtx_PARALLEL (VOIDmode
,
12948 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
12949 if (wback_offset
!= 0)
12951 XVECEXP (result
, 0, 0)
12952 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
12957 for (j
= 0; i
< count
; i
++, j
++)
12958 XVECEXP (result
, 0, i
)
12959 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
12964 /* Generate a store-multiple instruction. COUNT is the number of stores in
12965 the instruction; REGS and MEMS are arrays containing the operands.
12966 BASEREG is the base register to be used in addressing the memory operands.
12967 WBACK_OFFSET is nonzero if the instruction should update the base
12971 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
12972 HOST_WIDE_INT wback_offset
)
12977 if (GET_CODE (basereg
) == PLUS
)
12978 basereg
= XEXP (basereg
, 0);
12980 if (!multiple_operation_profitable_p (false, count
, 0))
12986 for (i
= 0; i
< count
; i
++)
12987 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
12989 if (wback_offset
!= 0)
12990 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
12992 seq
= get_insns ();
12998 result
= gen_rtx_PARALLEL (VOIDmode
,
12999 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13000 if (wback_offset
!= 0)
13002 XVECEXP (result
, 0, 0)
13003 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13008 for (j
= 0; i
< count
; i
++, j
++)
13009 XVECEXP (result
, 0, i
)
13010 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13015 /* Generate either a load-multiple or a store-multiple instruction. This
13016 function can be used in situations where we can start with a single MEM
13017 rtx and adjust its address upwards.
13018 COUNT is the number of operations in the instruction, not counting a
13019 possible update of the base register. REGS is an array containing the
13021 BASEREG is the base register to be used in addressing the memory operands,
13022 which are constructed from BASEMEM.
13023 WRITE_BACK specifies whether the generated instruction should include an
13024 update of the base register.
13025 OFFSETP is used to pass an offset to and from this function; this offset
13026 is not used when constructing the address (instead BASEMEM should have an
13027 appropriate offset in its address), it is used only for setting
13028 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13031 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13032 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13034 rtx mems
[MAX_LDM_STM_OPS
];
13035 HOST_WIDE_INT offset
= *offsetp
;
13038 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13040 if (GET_CODE (basereg
) == PLUS
)
13041 basereg
= XEXP (basereg
, 0);
13043 for (i
= 0; i
< count
; i
++)
13045 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13046 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13054 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13055 write_back
? 4 * count
: 0);
13057 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13058 write_back
? 4 * count
: 0);
13062 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13063 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13065 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13070 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13071 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13073 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13077 /* Called from a peephole2 expander to turn a sequence of loads into an
13078 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13079 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13080 is true if we can reorder the registers because they are used commutatively
13082 Returns true iff we could generate a new instruction. */
13085 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13087 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13088 rtx mems
[MAX_LDM_STM_OPS
];
13089 int i
, j
, base_reg
;
13091 HOST_WIDE_INT offset
;
13092 int write_back
= FALSE
;
13096 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13097 &base_reg
, &offset
, !sort_regs
);
13103 for (i
= 0; i
< nops
- 1; i
++)
13104 for (j
= i
+ 1; j
< nops
; j
++)
13105 if (regs
[i
] > regs
[j
])
13111 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13115 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13116 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13122 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13123 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13125 if (!TARGET_THUMB1
)
13127 base_reg
= regs
[0];
13128 base_reg_rtx
= newbase
;
13132 for (i
= 0; i
< nops
; i
++)
13134 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13135 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13138 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13139 write_back
? offset
+ i
* 4 : 0));
13143 /* Called from a peephole2 expander to turn a sequence of stores into an
13144 STM instruction. OPERANDS are the operands found by the peephole matcher;
13145 NOPS indicates how many separate stores we are trying to combine.
13146 Returns true iff we could generate a new instruction. */
13149 gen_stm_seq (rtx
*operands
, int nops
)
13152 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13153 rtx mems
[MAX_LDM_STM_OPS
];
13156 HOST_WIDE_INT offset
;
13157 int write_back
= FALSE
;
13160 bool base_reg_dies
;
13162 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13163 mem_order
, &base_reg
, &offset
, true);
13168 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13170 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13173 gcc_assert (base_reg_dies
);
13179 gcc_assert (base_reg_dies
);
13180 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13184 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13186 for (i
= 0; i
< nops
; i
++)
13188 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13189 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13192 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13193 write_back
? offset
+ i
* 4 : 0));
13197 /* Called from a peephole2 expander to turn a sequence of stores that are
13198 preceded by constant loads into an STM instruction. OPERANDS are the
13199 operands found by the peephole matcher; NOPS indicates how many
13200 separate stores we are trying to combine; there are 2 * NOPS
13201 instructions in the peephole.
13202 Returns true iff we could generate a new instruction. */
13205 gen_const_stm_seq (rtx
*operands
, int nops
)
13207 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13208 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13209 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13210 rtx mems
[MAX_LDM_STM_OPS
];
13213 HOST_WIDE_INT offset
;
13214 int write_back
= FALSE
;
13217 bool base_reg_dies
;
13219 HARD_REG_SET allocated
;
13221 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13222 mem_order
, &base_reg
, &offset
, false);
13227 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13229 /* If the same register is used more than once, try to find a free
13231 CLEAR_HARD_REG_SET (allocated
);
13232 for (i
= 0; i
< nops
; i
++)
13234 for (j
= i
+ 1; j
< nops
; j
++)
13235 if (regs
[i
] == regs
[j
])
13237 rtx t
= peep2_find_free_register (0, nops
* 2,
13238 TARGET_THUMB1
? "l" : "r",
13239 SImode
, &allocated
);
13243 regs
[i
] = REGNO (t
);
13247 /* Compute an ordering that maps the register numbers to an ascending
13250 for (i
= 0; i
< nops
; i
++)
13251 if (regs
[i
] < regs
[reg_order
[0]])
13254 for (i
= 1; i
< nops
; i
++)
13256 int this_order
= reg_order
[i
- 1];
13257 for (j
= 0; j
< nops
; j
++)
13258 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13259 && (this_order
== reg_order
[i
- 1]
13260 || regs
[j
] < regs
[this_order
]))
13262 reg_order
[i
] = this_order
;
13265 /* Ensure that registers that must be live after the instruction end
13266 up with the correct value. */
13267 for (i
= 0; i
< nops
; i
++)
13269 int this_order
= reg_order
[i
];
13270 if ((this_order
!= mem_order
[i
]
13271 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13272 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13276 /* Load the constants. */
13277 for (i
= 0; i
< nops
; i
++)
13279 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13280 sorted_regs
[i
] = regs
[reg_order
[i
]];
13281 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13284 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13286 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13289 gcc_assert (base_reg_dies
);
13295 gcc_assert (base_reg_dies
);
13296 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13300 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13302 for (i
= 0; i
< nops
; i
++)
13304 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13305 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13308 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13309 write_back
? offset
+ i
* 4 : 0));
13313 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13314 unaligned copies on processors which support unaligned semantics for those
13315 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13316 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13317 An interleave factor of 1 (the minimum) will perform no interleaving.
13318 Load/store multiple are used for aligned addresses where possible. */
13321 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13322 HOST_WIDE_INT length
,
13323 unsigned int interleave_factor
)
13325 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13326 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13327 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13328 HOST_WIDE_INT i
, j
;
13329 HOST_WIDE_INT remaining
= length
, words
;
13330 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13332 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13333 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13334 HOST_WIDE_INT srcoffset
, dstoffset
;
13335 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13338 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13340 /* Use hard registers if we have aligned source or destination so we can use
13341 load/store multiple with contiguous registers. */
13342 if (dst_aligned
|| src_aligned
)
13343 for (i
= 0; i
< interleave_factor
; i
++)
13344 regs
[i
] = gen_rtx_REG (SImode
, i
);
13346 for (i
= 0; i
< interleave_factor
; i
++)
13347 regs
[i
] = gen_reg_rtx (SImode
);
13349 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13350 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13352 srcoffset
= dstoffset
= 0;
13354 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13355 For copying the last bytes we want to subtract this offset again. */
13356 src_autoinc
= dst_autoinc
= 0;
13358 for (i
= 0; i
< interleave_factor
; i
++)
13361 /* Copy BLOCK_SIZE_BYTES chunks. */
13363 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13366 if (src_aligned
&& interleave_factor
> 1)
13368 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13369 TRUE
, srcbase
, &srcoffset
));
13370 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13374 for (j
= 0; j
< interleave_factor
; j
++)
13376 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13378 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13379 srcoffset
+ j
* UNITS_PER_WORD
);
13380 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13382 srcoffset
+= block_size_bytes
;
13386 if (dst_aligned
&& interleave_factor
> 1)
13388 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13389 TRUE
, dstbase
, &dstoffset
));
13390 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13394 for (j
= 0; j
< interleave_factor
; j
++)
13396 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13398 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13399 dstoffset
+ j
* UNITS_PER_WORD
);
13400 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13402 dstoffset
+= block_size_bytes
;
13405 remaining
-= block_size_bytes
;
13408 /* Copy any whole words left (note these aren't interleaved with any
13409 subsequent halfword/byte load/stores in the interests of simplicity). */
13411 words
= remaining
/ UNITS_PER_WORD
;
13413 gcc_assert (words
< interleave_factor
);
13415 if (src_aligned
&& words
> 1)
13417 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13419 src_autoinc
+= UNITS_PER_WORD
* words
;
13423 for (j
= 0; j
< words
; j
++)
13425 addr
= plus_constant (Pmode
, src
,
13426 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13427 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13428 srcoffset
+ j
* UNITS_PER_WORD
);
13430 emit_move_insn (regs
[j
], mem
);
13432 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13434 srcoffset
+= words
* UNITS_PER_WORD
;
13437 if (dst_aligned
&& words
> 1)
13439 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13441 dst_autoinc
+= words
* UNITS_PER_WORD
;
13445 for (j
= 0; j
< words
; j
++)
13447 addr
= plus_constant (Pmode
, dst
,
13448 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13449 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13450 dstoffset
+ j
* UNITS_PER_WORD
);
13452 emit_move_insn (mem
, regs
[j
]);
13454 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13456 dstoffset
+= words
* UNITS_PER_WORD
;
13459 remaining
-= words
* UNITS_PER_WORD
;
13461 gcc_assert (remaining
< 4);
13463 /* Copy a halfword if necessary. */
13465 if (remaining
>= 2)
13467 halfword_tmp
= gen_reg_rtx (SImode
);
13469 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13470 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13471 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13473 /* Either write out immediately, or delay until we've loaded the last
13474 byte, depending on interleave factor. */
13475 if (interleave_factor
== 1)
13477 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13478 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13479 emit_insn (gen_unaligned_storehi (mem
,
13480 gen_lowpart (HImode
, halfword_tmp
)));
13481 halfword_tmp
= NULL
;
13489 gcc_assert (remaining
< 2);
13491 /* Copy last byte. */
13493 if ((remaining
& 1) != 0)
13495 byte_tmp
= gen_reg_rtx (SImode
);
13497 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13498 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13499 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
13501 if (interleave_factor
== 1)
13503 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13504 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13505 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13514 /* Store last halfword if we haven't done so already. */
13518 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13519 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13520 emit_insn (gen_unaligned_storehi (mem
,
13521 gen_lowpart (HImode
, halfword_tmp
)));
13525 /* Likewise for last byte. */
13529 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13530 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13531 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13535 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
13538 /* From mips_adjust_block_mem:
13540 Helper function for doing a loop-based block operation on memory
13541 reference MEM. Each iteration of the loop will operate on LENGTH
13544 Create a new base register for use within the loop and point it to
13545 the start of MEM. Create a new memory reference that uses this
13546 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
13549 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
13552 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
13554 /* Although the new mem does not refer to a known location,
13555 it does keep up to LENGTH bytes of alignment. */
13556 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
13557 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
13560 /* From mips_block_move_loop:
13562 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
13563 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
13564 the memory regions do not overlap. */
13567 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
13568 unsigned int interleave_factor
,
13569 HOST_WIDE_INT bytes_per_iter
)
13571 rtx src_reg
, dest_reg
, final_src
, test
;
13572 HOST_WIDE_INT leftover
;
13574 leftover
= length
% bytes_per_iter
;
13575 length
-= leftover
;
13577 /* Create registers and memory references for use within the loop. */
13578 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
13579 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
13581 /* Calculate the value that SRC_REG should have after the last iteration of
13583 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
13584 0, 0, OPTAB_WIDEN
);
13586 /* Emit the start of the loop. */
13587 rtx_code_label
*label
= gen_label_rtx ();
13588 emit_label (label
);
13590 /* Emit the loop body. */
13591 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
13592 interleave_factor
);
13594 /* Move on to the next block. */
13595 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
13596 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
13598 /* Emit the loop condition. */
13599 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
13600 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
13602 /* Mop up any left-over bytes. */
13604 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
13607 /* Emit a block move when either the source or destination is unaligned (not
13608 aligned to a four-byte boundary). This may need further tuning depending on
13609 core type, optimize_size setting, etc. */
13612 arm_movmemqi_unaligned (rtx
*operands
)
13614 HOST_WIDE_INT length
= INTVAL (operands
[2]);
13618 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
13619 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
13620 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
13621 size of code if optimizing for size. We'll use ldm/stm if src_aligned
13622 or dst_aligned though: allow more interleaving in those cases since the
13623 resulting code can be smaller. */
13624 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
13625 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
13628 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
13629 interleave_factor
, bytes_per_iter
);
13631 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
13632 interleave_factor
);
13636 /* Note that the loop created by arm_block_move_unaligned_loop may be
13637 subject to loop unrolling, which makes tuning this condition a little
13640 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
13642 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
13649 arm_gen_movmemqi (rtx
*operands
)
13651 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
13652 HOST_WIDE_INT srcoffset
, dstoffset
;
13654 rtx src
, dst
, srcbase
, dstbase
;
13655 rtx part_bytes_reg
= NULL
;
13658 if (!CONST_INT_P (operands
[2])
13659 || !CONST_INT_P (operands
[3])
13660 || INTVAL (operands
[2]) > 64)
13663 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
13664 return arm_movmemqi_unaligned (operands
);
13666 if (INTVAL (operands
[3]) & 3)
13669 dstbase
= operands
[0];
13670 srcbase
= operands
[1];
13672 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
13673 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
13675 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
13676 out_words_to_go
= INTVAL (operands
[2]) / 4;
13677 last_bytes
= INTVAL (operands
[2]) & 3;
13678 dstoffset
= srcoffset
= 0;
13680 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
13681 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
13683 for (i
= 0; in_words_to_go
>= 2; i
+=4)
13685 if (in_words_to_go
> 4)
13686 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
13687 TRUE
, srcbase
, &srcoffset
));
13689 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
13690 src
, FALSE
, srcbase
,
13693 if (out_words_to_go
)
13695 if (out_words_to_go
> 4)
13696 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
13697 TRUE
, dstbase
, &dstoffset
));
13698 else if (out_words_to_go
!= 1)
13699 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
13700 out_words_to_go
, dst
,
13703 dstbase
, &dstoffset
));
13706 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
13707 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
13708 if (last_bytes
!= 0)
13710 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
13716 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
13717 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
13720 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
13721 if (out_words_to_go
)
13725 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
13726 sreg
= copy_to_reg (mem
);
13728 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
13729 emit_move_insn (mem
, sreg
);
13732 gcc_assert (!in_words_to_go
); /* Sanity check */
13735 if (in_words_to_go
)
13737 gcc_assert (in_words_to_go
> 0);
13739 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
13740 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
13743 gcc_assert (!last_bytes
|| part_bytes_reg
);
13745 if (BYTES_BIG_ENDIAN
&& last_bytes
)
13747 rtx tmp
= gen_reg_rtx (SImode
);
13749 /* The bytes we want are in the top end of the word. */
13750 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
13751 GEN_INT (8 * (4 - last_bytes
))));
13752 part_bytes_reg
= tmp
;
13756 mem
= adjust_automodify_address (dstbase
, QImode
,
13757 plus_constant (Pmode
, dst
,
13759 dstoffset
+ last_bytes
- 1);
13760 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
13764 tmp
= gen_reg_rtx (SImode
);
13765 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
13766 part_bytes_reg
= tmp
;
13773 if (last_bytes
> 1)
13775 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
13776 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
13780 rtx tmp
= gen_reg_rtx (SImode
);
13781 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
13782 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
13783 part_bytes_reg
= tmp
;
13790 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
13791 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
13798 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
13801 next_consecutive_mem (rtx mem
)
13803 machine_mode mode
= GET_MODE (mem
);
13804 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
13805 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
13807 return adjust_automodify_address (mem
, mode
, addr
, offset
);
13810 /* Copy using LDRD/STRD instructions whenever possible.
13811 Returns true upon success. */
13813 gen_movmem_ldrd_strd (rtx
*operands
)
13815 unsigned HOST_WIDE_INT len
;
13816 HOST_WIDE_INT align
;
13817 rtx src
, dst
, base
;
13819 bool src_aligned
, dst_aligned
;
13820 bool src_volatile
, dst_volatile
;
13822 gcc_assert (CONST_INT_P (operands
[2]));
13823 gcc_assert (CONST_INT_P (operands
[3]));
13825 len
= UINTVAL (operands
[2]);
13829 /* Maximum alignment we can assume for both src and dst buffers. */
13830 align
= INTVAL (operands
[3]);
13832 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
13835 /* Place src and dst addresses in registers
13836 and update the corresponding mem rtx. */
13838 dst_volatile
= MEM_VOLATILE_P (dst
);
13839 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
13840 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
13841 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
13844 src_volatile
= MEM_VOLATILE_P (src
);
13845 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
13846 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
13847 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
13849 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
13852 if (src_volatile
|| dst_volatile
)
13855 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
13856 if (!(dst_aligned
|| src_aligned
))
13857 return arm_gen_movmemqi (operands
);
13859 /* If the either src or dst is unaligned we'll be accessing it as pairs
13860 of unaligned SImode accesses. Otherwise we can generate DImode
13861 ldrd/strd instructions. */
13862 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
13863 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
13868 reg0
= gen_reg_rtx (DImode
);
13869 rtx low_reg
= NULL_RTX
;
13870 rtx hi_reg
= NULL_RTX
;
13872 if (!src_aligned
|| !dst_aligned
)
13874 low_reg
= gen_lowpart (SImode
, reg0
);
13875 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
13878 emit_move_insn (reg0
, src
);
13881 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
13882 src
= next_consecutive_mem (src
);
13883 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
13887 emit_move_insn (dst
, reg0
);
13890 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
13891 dst
= next_consecutive_mem (dst
);
13892 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
13895 src
= next_consecutive_mem (src
);
13896 dst
= next_consecutive_mem (dst
);
13899 gcc_assert (len
< 8);
13902 /* More than a word but less than a double-word to copy. Copy a word. */
13903 reg0
= gen_reg_rtx (SImode
);
13904 src
= adjust_address (src
, SImode
, 0);
13905 dst
= adjust_address (dst
, SImode
, 0);
13907 emit_move_insn (reg0
, src
);
13909 emit_insn (gen_unaligned_loadsi (reg0
, src
));
13912 emit_move_insn (dst
, reg0
);
13914 emit_insn (gen_unaligned_storesi (dst
, reg0
));
13916 src
= next_consecutive_mem (src
);
13917 dst
= next_consecutive_mem (dst
);
13924 /* Copy the remaining bytes. */
13927 dst
= adjust_address (dst
, HImode
, 0);
13928 src
= adjust_address (src
, HImode
, 0);
13929 reg0
= gen_reg_rtx (SImode
);
13931 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
13933 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
13936 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
13938 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
13940 src
= next_consecutive_mem (src
);
13941 dst
= next_consecutive_mem (dst
);
13946 dst
= adjust_address (dst
, QImode
, 0);
13947 src
= adjust_address (src
, QImode
, 0);
13948 reg0
= gen_reg_rtx (QImode
);
13949 emit_move_insn (reg0
, src
);
13950 emit_move_insn (dst
, reg0
);
13954 /* Select a dominance comparison mode if possible for a test of the general
13955 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
13956 COND_OR == DOM_CC_X_AND_Y => (X && Y)
13957 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
13958 COND_OR == DOM_CC_X_OR_Y => (X || Y)
13959 In all cases OP will be either EQ or NE, but we don't need to know which
13960 here. If we are unable to support a dominance comparison we return
13961 CC mode. This will then fail to match for the RTL expressions that
13962 generate this call. */
13964 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
13966 enum rtx_code cond1
, cond2
;
13969 /* Currently we will probably get the wrong result if the individual
13970 comparisons are not simple. This also ensures that it is safe to
13971 reverse a comparison if necessary. */
13972 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
13974 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
13978 /* The if_then_else variant of this tests the second condition if the
13979 first passes, but is true if the first fails. Reverse the first
13980 condition to get a true "inclusive-or" expression. */
13981 if (cond_or
== DOM_CC_NX_OR_Y
)
13982 cond1
= reverse_condition (cond1
);
13984 /* If the comparisons are not equal, and one doesn't dominate the other,
13985 then we can't do this. */
13987 && !comparison_dominates_p (cond1
, cond2
)
13988 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
13992 std::swap (cond1
, cond2
);
13997 if (cond_or
== DOM_CC_X_AND_Y
)
14002 case EQ
: return CC_DEQmode
;
14003 case LE
: return CC_DLEmode
;
14004 case LEU
: return CC_DLEUmode
;
14005 case GE
: return CC_DGEmode
;
14006 case GEU
: return CC_DGEUmode
;
14007 default: gcc_unreachable ();
14011 if (cond_or
== DOM_CC_X_AND_Y
)
14023 gcc_unreachable ();
14027 if (cond_or
== DOM_CC_X_AND_Y
)
14039 gcc_unreachable ();
14043 if (cond_or
== DOM_CC_X_AND_Y
)
14044 return CC_DLTUmode
;
14049 return CC_DLTUmode
;
14051 return CC_DLEUmode
;
14055 gcc_unreachable ();
14059 if (cond_or
== DOM_CC_X_AND_Y
)
14060 return CC_DGTUmode
;
14065 return CC_DGTUmode
;
14067 return CC_DGEUmode
;
14071 gcc_unreachable ();
14074 /* The remaining cases only occur when both comparisons are the
14077 gcc_assert (cond1
== cond2
);
14081 gcc_assert (cond1
== cond2
);
14085 gcc_assert (cond1
== cond2
);
14089 gcc_assert (cond1
== cond2
);
14090 return CC_DLEUmode
;
14093 gcc_assert (cond1
== cond2
);
14094 return CC_DGEUmode
;
14097 gcc_unreachable ();
14102 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14104 /* All floating point compares return CCFP if it is an equality
14105 comparison, and CCFPE otherwise. */
14106 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14129 gcc_unreachable ();
14133 /* A compare with a shifted operand. Because of canonicalization, the
14134 comparison will have to be swapped when we emit the assembler. */
14135 if (GET_MODE (y
) == SImode
14136 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14137 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14138 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14139 || GET_CODE (x
) == ROTATERT
))
14142 /* This operation is performed swapped, but since we only rely on the Z
14143 flag we don't need an additional mode. */
14144 if (GET_MODE (y
) == SImode
14145 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14146 && GET_CODE (x
) == NEG
14147 && (op
== EQ
|| op
== NE
))
14150 /* This is a special case that is used by combine to allow a
14151 comparison of a shifted byte load to be split into a zero-extend
14152 followed by a comparison of the shifted integer (only valid for
14153 equalities and unsigned inequalities). */
14154 if (GET_MODE (x
) == SImode
14155 && GET_CODE (x
) == ASHIFT
14156 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14157 && GET_CODE (XEXP (x
, 0)) == SUBREG
14158 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14159 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14160 && (op
== EQ
|| op
== NE
14161 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14162 && CONST_INT_P (y
))
14165 /* A construct for a conditional compare, if the false arm contains
14166 0, then both conditions must be true, otherwise either condition
14167 must be true. Not all conditions are possible, so CCmode is
14168 returned if it can't be done. */
14169 if (GET_CODE (x
) == IF_THEN_ELSE
14170 && (XEXP (x
, 2) == const0_rtx
14171 || XEXP (x
, 2) == const1_rtx
)
14172 && COMPARISON_P (XEXP (x
, 0))
14173 && COMPARISON_P (XEXP (x
, 1)))
14174 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14175 INTVAL (XEXP (x
, 2)));
14177 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14178 if (GET_CODE (x
) == AND
14179 && (op
== EQ
|| op
== NE
)
14180 && COMPARISON_P (XEXP (x
, 0))
14181 && COMPARISON_P (XEXP (x
, 1)))
14182 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14185 if (GET_CODE (x
) == IOR
14186 && (op
== EQ
|| op
== NE
)
14187 && COMPARISON_P (XEXP (x
, 0))
14188 && COMPARISON_P (XEXP (x
, 1)))
14189 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14192 /* An operation (on Thumb) where we want to test for a single bit.
14193 This is done by shifting that bit up into the top bit of a
14194 scratch register; we can then branch on the sign bit. */
14196 && GET_MODE (x
) == SImode
14197 && (op
== EQ
|| op
== NE
)
14198 && GET_CODE (x
) == ZERO_EXTRACT
14199 && XEXP (x
, 1) == const1_rtx
)
14202 /* An operation that sets the condition codes as a side-effect, the
14203 V flag is not set correctly, so we can only use comparisons where
14204 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14206 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14207 if (GET_MODE (x
) == SImode
14209 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14210 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14211 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14212 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14213 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14214 || GET_CODE (x
) == LSHIFTRT
14215 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14216 || GET_CODE (x
) == ROTATERT
14217 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14218 return CC_NOOVmode
;
14220 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14223 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14224 && GET_CODE (x
) == PLUS
14225 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14228 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14234 /* A DImode comparison against zero can be implemented by
14235 or'ing the two halves together. */
14236 if (y
== const0_rtx
)
14239 /* We can do an equality test in three Thumb instructions. */
14249 /* DImode unsigned comparisons can be implemented by cmp +
14250 cmpeq without a scratch register. Not worth doing in
14261 /* DImode signed and unsigned comparisons can be implemented
14262 by cmp + sbcs with a scratch register, but that does not
14263 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14264 gcc_assert (op
!= EQ
&& op
!= NE
);
14268 gcc_unreachable ();
14272 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14273 return GET_MODE (x
);
14278 /* X and Y are two things to compare using CODE. Emit the compare insn and
14279 return the rtx for register 0 in the proper mode. FP means this is a
14280 floating point compare: I don't think that it is needed on the arm. */
14282 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14286 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14288 /* We might have X as a constant, Y as a register because of the predicates
14289 used for cmpdi. If so, force X to a register here. */
14290 if (dimode_comparison
&& !REG_P (x
))
14291 x
= force_reg (DImode
, x
);
14293 mode
= SELECT_CC_MODE (code
, x
, y
);
14294 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14296 if (dimode_comparison
14297 && mode
!= CC_CZmode
)
14301 /* To compare two non-zero values for equality, XOR them and
14302 then compare against zero. Not used for ARM mode; there
14303 CC_CZmode is cheaper. */
14304 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14306 gcc_assert (!reload_completed
);
14307 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14311 /* A scratch register is required. */
14312 if (reload_completed
)
14313 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14315 scratch
= gen_rtx_SCRATCH (SImode
);
14317 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14318 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14319 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14322 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14327 /* Generate a sequence of insns that will generate the correct return
14328 address mask depending on the physical architecture that the program
14331 arm_gen_return_addr_mask (void)
14333 rtx reg
= gen_reg_rtx (Pmode
);
14335 emit_insn (gen_return_addr_mask (reg
));
14340 arm_reload_in_hi (rtx
*operands
)
14342 rtx ref
= operands
[1];
14344 HOST_WIDE_INT offset
= 0;
14346 if (GET_CODE (ref
) == SUBREG
)
14348 offset
= SUBREG_BYTE (ref
);
14349 ref
= SUBREG_REG (ref
);
14354 /* We have a pseudo which has been spilt onto the stack; there
14355 are two cases here: the first where there is a simple
14356 stack-slot replacement and a second where the stack-slot is
14357 out of range, or is used as a subreg. */
14358 if (reg_equiv_mem (REGNO (ref
)))
14360 ref
= reg_equiv_mem (REGNO (ref
));
14361 base
= find_replacement (&XEXP (ref
, 0));
14364 /* The slot is out of range, or was dressed up in a SUBREG. */
14365 base
= reg_equiv_address (REGNO (ref
));
14367 /* PR 62554: If there is no equivalent memory location then just move
14368 the value as an SImode register move. This happens when the target
14369 architecture variant does not have an HImode register move. */
14372 gcc_assert (REG_P (operands
[0]));
14373 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14374 gen_rtx_SUBREG (SImode
, ref
, 0)));
14379 base
= find_replacement (&XEXP (ref
, 0));
14381 /* Handle the case where the address is too complex to be offset by 1. */
14382 if (GET_CODE (base
) == MINUS
14383 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14385 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14387 emit_set_insn (base_plus
, base
);
14390 else if (GET_CODE (base
) == PLUS
)
14392 /* The addend must be CONST_INT, or we would have dealt with it above. */
14393 HOST_WIDE_INT hi
, lo
;
14395 offset
+= INTVAL (XEXP (base
, 1));
14396 base
= XEXP (base
, 0);
14398 /* Rework the address into a legal sequence of insns. */
14399 /* Valid range for lo is -4095 -> 4095 */
14402 : -((-offset
) & 0xfff));
14404 /* Corner case, if lo is the max offset then we would be out of range
14405 once we have added the additional 1 below, so bump the msb into the
14406 pre-loading insn(s). */
14410 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14411 ^ (HOST_WIDE_INT
) 0x80000000)
14412 - (HOST_WIDE_INT
) 0x80000000);
14414 gcc_assert (hi
+ lo
== offset
);
14418 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14420 /* Get the base address; addsi3 knows how to handle constants
14421 that require more than one insn. */
14422 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14428 /* Operands[2] may overlap operands[0] (though it won't overlap
14429 operands[1]), that's why we asked for a DImode reg -- so we can
14430 use the bit that does not overlap. */
14431 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14432 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14434 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14436 emit_insn (gen_zero_extendqisi2 (scratch
,
14437 gen_rtx_MEM (QImode
,
14438 plus_constant (Pmode
, base
,
14440 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14441 gen_rtx_MEM (QImode
,
14442 plus_constant (Pmode
, base
,
14444 if (!BYTES_BIG_ENDIAN
)
14445 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14446 gen_rtx_IOR (SImode
,
14449 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14453 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14454 gen_rtx_IOR (SImode
,
14455 gen_rtx_ASHIFT (SImode
, scratch
,
14457 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14460 /* Handle storing a half-word to memory during reload by synthesizing as two
14461 byte stores. Take care not to clobber the input values until after we
14462 have moved them somewhere safe. This code assumes that if the DImode
14463 scratch in operands[2] overlaps either the input value or output address
14464 in some way, then that value must die in this insn (we absolutely need
14465 two scratch registers for some corner cases). */
14467 arm_reload_out_hi (rtx
*operands
)
14469 rtx ref
= operands
[0];
14470 rtx outval
= operands
[1];
14472 HOST_WIDE_INT offset
= 0;
14474 if (GET_CODE (ref
) == SUBREG
)
14476 offset
= SUBREG_BYTE (ref
);
14477 ref
= SUBREG_REG (ref
);
14482 /* We have a pseudo which has been spilt onto the stack; there
14483 are two cases here: the first where there is a simple
14484 stack-slot replacement and a second where the stack-slot is
14485 out of range, or is used as a subreg. */
14486 if (reg_equiv_mem (REGNO (ref
)))
14488 ref
= reg_equiv_mem (REGNO (ref
));
14489 base
= find_replacement (&XEXP (ref
, 0));
14492 /* The slot is out of range, or was dressed up in a SUBREG. */
14493 base
= reg_equiv_address (REGNO (ref
));
14495 /* PR 62254: If there is no equivalent memory location then just move
14496 the value as an SImode register move. This happens when the target
14497 architecture variant does not have an HImode register move. */
14500 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
14502 if (REG_P (outval
))
14504 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14505 gen_rtx_SUBREG (SImode
, outval
, 0)));
14507 else /* SUBREG_P (outval) */
14509 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
14510 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14511 SUBREG_REG (outval
)));
14513 /* FIXME: Handle other cases ? */
14514 gcc_unreachable ();
14520 base
= find_replacement (&XEXP (ref
, 0));
14522 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14524 /* Handle the case where the address is too complex to be offset by 1. */
14525 if (GET_CODE (base
) == MINUS
14526 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14528 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14530 /* Be careful not to destroy OUTVAL. */
14531 if (reg_overlap_mentioned_p (base_plus
, outval
))
14533 /* Updating base_plus might destroy outval, see if we can
14534 swap the scratch and base_plus. */
14535 if (!reg_overlap_mentioned_p (scratch
, outval
))
14536 std::swap (scratch
, base_plus
);
14539 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14541 /* Be conservative and copy OUTVAL into the scratch now,
14542 this should only be necessary if outval is a subreg
14543 of something larger than a word. */
14544 /* XXX Might this clobber base? I can't see how it can,
14545 since scratch is known to overlap with OUTVAL, and
14546 must be wider than a word. */
14547 emit_insn (gen_movhi (scratch_hi
, outval
));
14548 outval
= scratch_hi
;
14552 emit_set_insn (base_plus
, base
);
14555 else if (GET_CODE (base
) == PLUS
)
14557 /* The addend must be CONST_INT, or we would have dealt with it above. */
14558 HOST_WIDE_INT hi
, lo
;
14560 offset
+= INTVAL (XEXP (base
, 1));
14561 base
= XEXP (base
, 0);
14563 /* Rework the address into a legal sequence of insns. */
14564 /* Valid range for lo is -4095 -> 4095 */
14567 : -((-offset
) & 0xfff));
14569 /* Corner case, if lo is the max offset then we would be out of range
14570 once we have added the additional 1 below, so bump the msb into the
14571 pre-loading insn(s). */
14575 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14576 ^ (HOST_WIDE_INT
) 0x80000000)
14577 - (HOST_WIDE_INT
) 0x80000000);
14579 gcc_assert (hi
+ lo
== offset
);
14583 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14585 /* Be careful not to destroy OUTVAL. */
14586 if (reg_overlap_mentioned_p (base_plus
, outval
))
14588 /* Updating base_plus might destroy outval, see if we
14589 can swap the scratch and base_plus. */
14590 if (!reg_overlap_mentioned_p (scratch
, outval
))
14591 std::swap (scratch
, base_plus
);
14594 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
14596 /* Be conservative and copy outval into scratch now,
14597 this should only be necessary if outval is a
14598 subreg of something larger than a word. */
14599 /* XXX Might this clobber base? I can't see how it
14600 can, since scratch is known to overlap with
14602 emit_insn (gen_movhi (scratch_hi
, outval
));
14603 outval
= scratch_hi
;
14607 /* Get the base address; addsi3 knows how to handle constants
14608 that require more than one insn. */
14609 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14615 if (BYTES_BIG_ENDIAN
)
14617 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
14618 plus_constant (Pmode
, base
,
14620 gen_lowpart (QImode
, outval
)));
14621 emit_insn (gen_lshrsi3 (scratch
,
14622 gen_rtx_SUBREG (SImode
, outval
, 0),
14624 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
14626 gen_lowpart (QImode
, scratch
)));
14630 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
14632 gen_lowpart (QImode
, outval
)));
14633 emit_insn (gen_lshrsi3 (scratch
,
14634 gen_rtx_SUBREG (SImode
, outval
, 0),
14636 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
14637 plus_constant (Pmode
, base
,
14639 gen_lowpart (QImode
, scratch
)));
14643 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
14644 (padded to the size of a word) should be passed in a register. */
14647 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
14649 if (TARGET_AAPCS_BASED
)
14650 return must_pass_in_stack_var_size (mode
, type
);
14652 return must_pass_in_stack_var_size_or_pad (mode
, type
);
14656 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
14657 Return true if an argument passed on the stack should be padded upwards,
14658 i.e. if the least-significant byte has useful data.
14659 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
14660 aggregate types are placed in the lowest memory address. */
14663 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
14665 if (!TARGET_AAPCS_BASED
)
14666 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
14668 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
14675 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
14676 Return !BYTES_BIG_ENDIAN if the least significant byte of the
14677 register has useful data, and return the opposite if the most
14678 significant byte does. */
14681 arm_pad_reg_upward (machine_mode mode
,
14682 tree type
, int first ATTRIBUTE_UNUSED
)
14684 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
14686 /* For AAPCS, small aggregates, small fixed-point types,
14687 and small complex types are always padded upwards. */
14690 if ((AGGREGATE_TYPE_P (type
)
14691 || TREE_CODE (type
) == COMPLEX_TYPE
14692 || FIXED_POINT_TYPE_P (type
))
14693 && int_size_in_bytes (type
) <= 4)
14698 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
14699 && GET_MODE_SIZE (mode
) <= 4)
14704 /* Otherwise, use default padding. */
14705 return !BYTES_BIG_ENDIAN
;
14708 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
14709 assuming that the address in the base register is word aligned. */
14711 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
14713 HOST_WIDE_INT max_offset
;
14715 /* Offset must be a multiple of 4 in Thumb mode. */
14716 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
14721 else if (TARGET_ARM
)
14726 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
14729 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
14730 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
14731 Assumes that the address in the base register RN is word aligned. Pattern
14732 guarantees that both memory accesses use the same base register,
14733 the offsets are constants within the range, and the gap between the offsets is 4.
14734 If preload complete then check that registers are legal. WBACK indicates whether
14735 address is updated. LOAD indicates whether memory access is load or store. */
14737 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
14738 bool wback
, bool load
)
14740 unsigned int t
, t2
, n
;
14742 if (!reload_completed
)
14745 if (!offset_ok_for_ldrd_strd (offset
))
14752 if ((TARGET_THUMB2
)
14753 && ((wback
&& (n
== t
|| n
== t2
))
14754 || (t
== SP_REGNUM
)
14755 || (t
== PC_REGNUM
)
14756 || (t2
== SP_REGNUM
)
14757 || (t2
== PC_REGNUM
)
14758 || (!load
&& (n
== PC_REGNUM
))
14759 || (load
&& (t
== t2
))
14760 /* Triggers Cortex-M3 LDRD errata. */
14761 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
14765 && ((wback
&& (n
== t
|| n
== t2
))
14766 || (t2
== PC_REGNUM
)
14767 || (t
% 2 != 0) /* First destination register is not even. */
14769 /* PC can be used as base register (for offset addressing only),
14770 but it is depricated. */
14771 || (n
== PC_REGNUM
)))
14777 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
14778 operand MEM's address contains an immediate offset from the base
14779 register and has no side effects, in which case it sets BASE and
14780 OFFSET accordingly. */
14782 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
14786 gcc_assert (base
!= NULL
&& offset
!= NULL
);
14788 /* TODO: Handle more general memory operand patterns, such as
14789 PRE_DEC and PRE_INC. */
14791 if (side_effects_p (mem
))
14794 /* Can't deal with subregs. */
14795 if (GET_CODE (mem
) == SUBREG
)
14798 gcc_assert (MEM_P (mem
));
14800 *offset
= const0_rtx
;
14802 addr
= XEXP (mem
, 0);
14804 /* If addr isn't valid for DImode, then we can't handle it. */
14805 if (!arm_legitimate_address_p (DImode
, addr
,
14806 reload_in_progress
|| reload_completed
))
14814 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
14816 *base
= XEXP (addr
, 0);
14817 *offset
= XEXP (addr
, 1);
14818 return (REG_P (*base
) && CONST_INT_P (*offset
));
14824 /* Called from a peephole2 to replace two word-size accesses with a
14825 single LDRD/STRD instruction. Returns true iff we can generate a
14826 new instruction sequence. That is, both accesses use the same base
14827 register and the gap between constant offsets is 4. This function
14828 may reorder its operands to match ldrd/strd RTL templates.
14829 OPERANDS are the operands found by the peephole matcher;
14830 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
14831 corresponding memory operands. LOAD indicaates whether the access
14832 is load or store. CONST_STORE indicates a store of constant
14833 integer values held in OPERANDS[4,5] and assumes that the pattern
14834 is of length 4 insn, for the purpose of checking dead registers.
14835 COMMUTE indicates that register operands may be reordered. */
14837 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
14838 bool const_store
, bool commute
)
14841 HOST_WIDE_INT offsets
[2], offset
;
14842 rtx base
= NULL_RTX
;
14843 rtx cur_base
, cur_offset
, tmp
;
14845 HARD_REG_SET regset
;
14847 gcc_assert (!const_store
|| !load
);
14848 /* Check that the memory references are immediate offsets from the
14849 same base register. Extract the base register, the destination
14850 registers, and the corresponding memory offsets. */
14851 for (i
= 0; i
< nops
; i
++)
14853 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
14858 else if (REGNO (base
) != REGNO (cur_base
))
14861 offsets
[i
] = INTVAL (cur_offset
);
14862 if (GET_CODE (operands
[i
]) == SUBREG
)
14864 tmp
= SUBREG_REG (operands
[i
]);
14865 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
14870 /* Make sure there is no dependency between the individual loads. */
14871 if (load
&& REGNO (operands
[0]) == REGNO (base
))
14872 return false; /* RAW */
14874 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
14875 return false; /* WAW */
14877 /* If the same input register is used in both stores
14878 when storing different constants, try to find a free register.
14879 For example, the code
14884 can be transformed into
14888 in Thumb mode assuming that r1 is free.
14889 For ARM mode do the same but only if the starting register
14890 can be made to be even. */
14892 && REGNO (operands
[0]) == REGNO (operands
[1])
14893 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
14897 CLEAR_HARD_REG_SET (regset
);
14898 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
14899 if (tmp
== NULL_RTX
)
14902 /* Use the new register in the first load to ensure that
14903 if the original input register is not dead after peephole,
14904 then it will have the correct constant value. */
14907 else if (TARGET_ARM
)
14909 int regno
= REGNO (operands
[0]);
14910 if (!peep2_reg_dead_p (4, operands
[0]))
14912 /* When the input register is even and is not dead after the
14913 pattern, it has to hold the second constant but we cannot
14914 form a legal STRD in ARM mode with this register as the second
14916 if (regno
% 2 == 0)
14919 /* Is regno-1 free? */
14920 SET_HARD_REG_SET (regset
);
14921 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
14922 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
14923 if (tmp
== NULL_RTX
)
14930 /* Find a DImode register. */
14931 CLEAR_HARD_REG_SET (regset
);
14932 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
14933 if (tmp
!= NULL_RTX
)
14935 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
14936 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
14940 /* Can we use the input register to form a DI register? */
14941 SET_HARD_REG_SET (regset
);
14942 CLEAR_HARD_REG_BIT(regset
,
14943 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
14944 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
14945 if (tmp
== NULL_RTX
)
14947 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
14951 gcc_assert (operands
[0] != NULL_RTX
);
14952 gcc_assert (operands
[1] != NULL_RTX
);
14953 gcc_assert (REGNO (operands
[0]) % 2 == 0);
14954 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
14958 /* Make sure the instructions are ordered with lower memory access first. */
14959 if (offsets
[0] > offsets
[1])
14961 gap
= offsets
[0] - offsets
[1];
14962 offset
= offsets
[1];
14964 /* Swap the instructions such that lower memory is accessed first. */
14965 std::swap (operands
[0], operands
[1]);
14966 std::swap (operands
[2], operands
[3]);
14968 std::swap (operands
[4], operands
[5]);
14972 gap
= offsets
[1] - offsets
[0];
14973 offset
= offsets
[0];
14976 /* Make sure accesses are to consecutive memory locations. */
14980 /* Make sure we generate legal instructions. */
14981 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
14985 /* In Thumb state, where registers are almost unconstrained, there
14986 is little hope to fix it. */
14990 if (load
&& commute
)
14992 /* Try reordering registers. */
14993 std::swap (operands
[0], operands
[1]);
14994 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15001 /* If input registers are dead after this pattern, they can be
15002 reordered or replaced by other registers that are free in the
15003 current pattern. */
15004 if (!peep2_reg_dead_p (4, operands
[0])
15005 || !peep2_reg_dead_p (4, operands
[1]))
15008 /* Try to reorder the input registers. */
15009 /* For example, the code
15014 can be transformed into
15019 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15022 std::swap (operands
[0], operands
[1]);
15026 /* Try to find a free DI register. */
15027 CLEAR_HARD_REG_SET (regset
);
15028 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15029 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15032 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15033 if (tmp
== NULL_RTX
)
15036 /* DREG must be an even-numbered register in DImode.
15037 Split it into SI registers. */
15038 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15039 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15040 gcc_assert (operands
[0] != NULL_RTX
);
15041 gcc_assert (operands
[1] != NULL_RTX
);
15042 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15043 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15045 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15057 /* Print a symbolic form of X to the debug file, F. */
15059 arm_print_value (FILE *f
, rtx x
)
15061 switch (GET_CODE (x
))
15064 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15068 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15076 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15078 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15079 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15087 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15091 fprintf (f
, "`%s'", XSTR (x
, 0));
15095 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15099 arm_print_value (f
, XEXP (x
, 0));
15103 arm_print_value (f
, XEXP (x
, 0));
15105 arm_print_value (f
, XEXP (x
, 1));
15113 fprintf (f
, "????");
15118 /* Routines for manipulation of the constant pool. */
15120 /* Arm instructions cannot load a large constant directly into a
15121 register; they have to come from a pc relative load. The constant
15122 must therefore be placed in the addressable range of the pc
15123 relative load. Depending on the precise pc relative load
15124 instruction the range is somewhere between 256 bytes and 4k. This
15125 means that we often have to dump a constant inside a function, and
15126 generate code to branch around it.
15128 It is important to minimize this, since the branches will slow
15129 things down and make the code larger.
15131 Normally we can hide the table after an existing unconditional
15132 branch so that there is no interruption of the flow, but in the
15133 worst case the code looks like this:
15151 We fix this by performing a scan after scheduling, which notices
15152 which instructions need to have their operands fetched from the
15153 constant table and builds the table.
15155 The algorithm starts by building a table of all the constants that
15156 need fixing up and all the natural barriers in the function (places
15157 where a constant table can be dropped without breaking the flow).
15158 For each fixup we note how far the pc-relative replacement will be
15159 able to reach and the offset of the instruction into the function.
15161 Having built the table we then group the fixes together to form
15162 tables that are as large as possible (subject to addressing
15163 constraints) and emit each table of constants after the last
15164 barrier that is within range of all the instructions in the group.
15165 If a group does not contain a barrier, then we forcibly create one
15166 by inserting a jump instruction into the flow. Once the table has
15167 been inserted, the insns are then modified to reference the
15168 relevant entry in the pool.
15170 Possible enhancements to the algorithm (not implemented) are:
15172 1) For some processors and object formats, there may be benefit in
15173 aligning the pools to the start of cache lines; this alignment
15174 would need to be taken into account when calculating addressability
15177 /* These typedefs are located at the start of this file, so that
15178 they can be used in the prototypes there. This comment is to
15179 remind readers of that fact so that the following structures
15180 can be understood more easily.
15182 typedef struct minipool_node Mnode;
15183 typedef struct minipool_fixup Mfix; */
15185 struct minipool_node
15187 /* Doubly linked chain of entries. */
15190 /* The maximum offset into the code that this entry can be placed. While
15191 pushing fixes for forward references, all entries are sorted in order
15192 of increasing max_address. */
15193 HOST_WIDE_INT max_address
;
15194 /* Similarly for an entry inserted for a backwards ref. */
15195 HOST_WIDE_INT min_address
;
15196 /* The number of fixes referencing this entry. This can become zero
15197 if we "unpush" an entry. In this case we ignore the entry when we
15198 come to emit the code. */
15200 /* The offset from the start of the minipool. */
15201 HOST_WIDE_INT offset
;
15202 /* The value in table. */
15204 /* The mode of value. */
15206 /* The size of the value. With iWMMXt enabled
15207 sizes > 4 also imply an alignment of 8-bytes. */
15211 struct minipool_fixup
15215 HOST_WIDE_INT address
;
15221 HOST_WIDE_INT forwards
;
15222 HOST_WIDE_INT backwards
;
15225 /* Fixes less than a word need padding out to a word boundary. */
15226 #define MINIPOOL_FIX_SIZE(mode) \
15227 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15229 static Mnode
* minipool_vector_head
;
15230 static Mnode
* minipool_vector_tail
;
15231 static rtx_code_label
*minipool_vector_label
;
15232 static int minipool_pad
;
15234 /* The linked list of all minipool fixes required for this function. */
15235 Mfix
* minipool_fix_head
;
15236 Mfix
* minipool_fix_tail
;
15237 /* The fix entry for the current minipool, once it has been placed. */
15238 Mfix
* minipool_barrier
;
15240 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15241 #define JUMP_TABLES_IN_TEXT_SECTION 0
15244 static HOST_WIDE_INT
15245 get_jump_table_size (rtx_jump_table_data
*insn
)
15247 /* ADDR_VECs only take room if read-only data does into the text
15249 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15251 rtx body
= PATTERN (insn
);
15252 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15253 HOST_WIDE_INT size
;
15254 HOST_WIDE_INT modesize
;
15256 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15257 size
= modesize
* XVECLEN (body
, elt
);
15261 /* Round up size of TBB table to a halfword boundary. */
15262 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15265 /* No padding necessary for TBH. */
15268 /* Add two bytes for alignment on Thumb. */
15273 gcc_unreachable ();
15281 /* Return the maximum amount of padding that will be inserted before
15284 static HOST_WIDE_INT
15285 get_label_padding (rtx label
)
15287 HOST_WIDE_INT align
, min_insn_size
;
15289 align
= 1 << label_to_alignment (label
);
15290 min_insn_size
= TARGET_THUMB
? 2 : 4;
15291 return align
> min_insn_size
? align
- min_insn_size
: 0;
15294 /* Move a minipool fix MP from its current location to before MAX_MP.
15295 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15296 constraints may need updating. */
15298 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15299 HOST_WIDE_INT max_address
)
15301 /* The code below assumes these are different. */
15302 gcc_assert (mp
!= max_mp
);
15304 if (max_mp
== NULL
)
15306 if (max_address
< mp
->max_address
)
15307 mp
->max_address
= max_address
;
15311 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15312 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15314 mp
->max_address
= max_address
;
15316 /* Unlink MP from its current position. Since max_mp is non-null,
15317 mp->prev must be non-null. */
15318 mp
->prev
->next
= mp
->next
;
15319 if (mp
->next
!= NULL
)
15320 mp
->next
->prev
= mp
->prev
;
15322 minipool_vector_tail
= mp
->prev
;
15324 /* Re-insert it before MAX_MP. */
15326 mp
->prev
= max_mp
->prev
;
15329 if (mp
->prev
!= NULL
)
15330 mp
->prev
->next
= mp
;
15332 minipool_vector_head
= mp
;
15335 /* Save the new entry. */
15338 /* Scan over the preceding entries and adjust their addresses as
15340 while (mp
->prev
!= NULL
15341 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15343 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15350 /* Add a constant to the minipool for a forward reference. Returns the
15351 node added or NULL if the constant will not fit in this pool. */
15353 add_minipool_forward_ref (Mfix
*fix
)
15355 /* If set, max_mp is the first pool_entry that has a lower
15356 constraint than the one we are trying to add. */
15357 Mnode
* max_mp
= NULL
;
15358 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15361 /* If the minipool starts before the end of FIX->INSN then this FIX
15362 can not be placed into the current pool. Furthermore, adding the
15363 new constant pool entry may cause the pool to start FIX_SIZE bytes
15365 if (minipool_vector_head
&&
15366 (fix
->address
+ get_attr_length (fix
->insn
)
15367 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15370 /* Scan the pool to see if a constant with the same value has
15371 already been added. While we are doing this, also note the
15372 location where we must insert the constant if it doesn't already
15374 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15376 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15377 && fix
->mode
== mp
->mode
15378 && (!LABEL_P (fix
->value
)
15379 || (CODE_LABEL_NUMBER (fix
->value
)
15380 == CODE_LABEL_NUMBER (mp
->value
)))
15381 && rtx_equal_p (fix
->value
, mp
->value
))
15383 /* More than one fix references this entry. */
15385 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15388 /* Note the insertion point if necessary. */
15390 && mp
->max_address
> max_address
)
15393 /* If we are inserting an 8-bytes aligned quantity and
15394 we have not already found an insertion point, then
15395 make sure that all such 8-byte aligned quantities are
15396 placed at the start of the pool. */
15397 if (ARM_DOUBLEWORD_ALIGN
15399 && fix
->fix_size
>= 8
15400 && mp
->fix_size
< 8)
15403 max_address
= mp
->max_address
;
15407 /* The value is not currently in the minipool, so we need to create
15408 a new entry for it. If MAX_MP is NULL, the entry will be put on
15409 the end of the list since the placement is less constrained than
15410 any existing entry. Otherwise, we insert the new fix before
15411 MAX_MP and, if necessary, adjust the constraints on the other
15414 mp
->fix_size
= fix
->fix_size
;
15415 mp
->mode
= fix
->mode
;
15416 mp
->value
= fix
->value
;
15418 /* Not yet required for a backwards ref. */
15419 mp
->min_address
= -65536;
15421 if (max_mp
== NULL
)
15423 mp
->max_address
= max_address
;
15425 mp
->prev
= minipool_vector_tail
;
15427 if (mp
->prev
== NULL
)
15429 minipool_vector_head
= mp
;
15430 minipool_vector_label
= gen_label_rtx ();
15433 mp
->prev
->next
= mp
;
15435 minipool_vector_tail
= mp
;
15439 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15440 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15442 mp
->max_address
= max_address
;
15445 mp
->prev
= max_mp
->prev
;
15447 if (mp
->prev
!= NULL
)
15448 mp
->prev
->next
= mp
;
15450 minipool_vector_head
= mp
;
15453 /* Save the new entry. */
15456 /* Scan over the preceding entries and adjust their addresses as
15458 while (mp
->prev
!= NULL
15459 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15461 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15469 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15470 HOST_WIDE_INT min_address
)
15472 HOST_WIDE_INT offset
;
15474 /* The code below assumes these are different. */
15475 gcc_assert (mp
!= min_mp
);
15477 if (min_mp
== NULL
)
15479 if (min_address
> mp
->min_address
)
15480 mp
->min_address
= min_address
;
15484 /* We will adjust this below if it is too loose. */
15485 mp
->min_address
= min_address
;
15487 /* Unlink MP from its current position. Since min_mp is non-null,
15488 mp->next must be non-null. */
15489 mp
->next
->prev
= mp
->prev
;
15490 if (mp
->prev
!= NULL
)
15491 mp
->prev
->next
= mp
->next
;
15493 minipool_vector_head
= mp
->next
;
15495 /* Reinsert it after MIN_MP. */
15497 mp
->next
= min_mp
->next
;
15499 if (mp
->next
!= NULL
)
15500 mp
->next
->prev
= mp
;
15502 minipool_vector_tail
= mp
;
15508 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15510 mp
->offset
= offset
;
15511 if (mp
->refcount
> 0)
15512 offset
+= mp
->fix_size
;
15514 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15515 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15521 /* Add a constant to the minipool for a backward reference. Returns the
15522 node added or NULL if the constant will not fit in this pool.
15524 Note that the code for insertion for a backwards reference can be
15525 somewhat confusing because the calculated offsets for each fix do
15526 not take into account the size of the pool (which is still under
15529 add_minipool_backward_ref (Mfix
*fix
)
15531 /* If set, min_mp is the last pool_entry that has a lower constraint
15532 than the one we are trying to add. */
15533 Mnode
*min_mp
= NULL
;
15534 /* This can be negative, since it is only a constraint. */
15535 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
15538 /* If we can't reach the current pool from this insn, or if we can't
15539 insert this entry at the end of the pool without pushing other
15540 fixes out of range, then we don't try. This ensures that we
15541 can't fail later on. */
15542 if (min_address
>= minipool_barrier
->address
15543 || (minipool_vector_tail
->min_address
+ fix
->fix_size
15544 >= minipool_barrier
->address
))
15547 /* Scan the pool to see if a constant with the same value has
15548 already been added. While we are doing this, also note the
15549 location where we must insert the constant if it doesn't already
15551 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
15553 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15554 && fix
->mode
== mp
->mode
15555 && (!LABEL_P (fix
->value
)
15556 || (CODE_LABEL_NUMBER (fix
->value
)
15557 == CODE_LABEL_NUMBER (mp
->value
)))
15558 && rtx_equal_p (fix
->value
, mp
->value
)
15559 /* Check that there is enough slack to move this entry to the
15560 end of the table (this is conservative). */
15561 && (mp
->max_address
15562 > (minipool_barrier
->address
15563 + minipool_vector_tail
->offset
15564 + minipool_vector_tail
->fix_size
)))
15567 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
15570 if (min_mp
!= NULL
)
15571 mp
->min_address
+= fix
->fix_size
;
15574 /* Note the insertion point if necessary. */
15575 if (mp
->min_address
< min_address
)
15577 /* For now, we do not allow the insertion of 8-byte alignment
15578 requiring nodes anywhere but at the start of the pool. */
15579 if (ARM_DOUBLEWORD_ALIGN
15580 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15585 else if (mp
->max_address
15586 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
15588 /* Inserting before this entry would push the fix beyond
15589 its maximum address (which can happen if we have
15590 re-located a forwards fix); force the new fix to come
15592 if (ARM_DOUBLEWORD_ALIGN
15593 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
15598 min_address
= mp
->min_address
+ fix
->fix_size
;
15601 /* Do not insert a non-8-byte aligned quantity before 8-byte
15602 aligned quantities. */
15603 else if (ARM_DOUBLEWORD_ALIGN
15604 && fix
->fix_size
< 8
15605 && mp
->fix_size
>= 8)
15608 min_address
= mp
->min_address
+ fix
->fix_size
;
15613 /* We need to create a new entry. */
15615 mp
->fix_size
= fix
->fix_size
;
15616 mp
->mode
= fix
->mode
;
15617 mp
->value
= fix
->value
;
15619 mp
->max_address
= minipool_barrier
->address
+ 65536;
15621 mp
->min_address
= min_address
;
15623 if (min_mp
== NULL
)
15626 mp
->next
= minipool_vector_head
;
15628 if (mp
->next
== NULL
)
15630 minipool_vector_tail
= mp
;
15631 minipool_vector_label
= gen_label_rtx ();
15634 mp
->next
->prev
= mp
;
15636 minipool_vector_head
= mp
;
15640 mp
->next
= min_mp
->next
;
15644 if (mp
->next
!= NULL
)
15645 mp
->next
->prev
= mp
;
15647 minipool_vector_tail
= mp
;
15650 /* Save the new entry. */
15658 /* Scan over the following entries and adjust their offsets. */
15659 while (mp
->next
!= NULL
)
15661 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15662 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15665 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
15667 mp
->next
->offset
= mp
->offset
;
15676 assign_minipool_offsets (Mfix
*barrier
)
15678 HOST_WIDE_INT offset
= 0;
15681 minipool_barrier
= barrier
;
15683 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15685 mp
->offset
= offset
;
15687 if (mp
->refcount
> 0)
15688 offset
+= mp
->fix_size
;
15692 /* Output the literal table */
15694 dump_minipool (rtx_insn
*scan
)
15700 if (ARM_DOUBLEWORD_ALIGN
)
15701 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15702 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
15709 fprintf (dump_file
,
15710 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
15711 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
15713 scan
= emit_label_after (gen_label_rtx (), scan
);
15714 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
15715 scan
= emit_label_after (minipool_vector_label
, scan
);
15717 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
15719 if (mp
->refcount
> 0)
15723 fprintf (dump_file
,
15724 ";; Offset %u, min %ld, max %ld ",
15725 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
15726 (unsigned long) mp
->max_address
);
15727 arm_print_value (dump_file
, mp
->value
);
15728 fputc ('\n', dump_file
);
15731 switch (GET_MODE_SIZE (mp
->mode
))
15733 #ifdef HAVE_consttable_1
15735 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
15739 #ifdef HAVE_consttable_2
15741 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
15745 #ifdef HAVE_consttable_4
15747 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
15751 #ifdef HAVE_consttable_8
15753 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
15757 #ifdef HAVE_consttable_16
15759 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
15764 gcc_unreachable ();
15772 minipool_vector_head
= minipool_vector_tail
= NULL
;
15773 scan
= emit_insn_after (gen_consttable_end (), scan
);
15774 scan
= emit_barrier_after (scan
);
15777 /* Return the cost of forcibly inserting a barrier after INSN. */
15779 arm_barrier_cost (rtx_insn
*insn
)
15781 /* Basing the location of the pool on the loop depth is preferable,
15782 but at the moment, the basic block information seems to be
15783 corrupt by this stage of the compilation. */
15784 int base_cost
= 50;
15785 rtx_insn
*next
= next_nonnote_insn (insn
);
15787 if (next
!= NULL
&& LABEL_P (next
))
15790 switch (GET_CODE (insn
))
15793 /* It will always be better to place the table before the label, rather
15802 return base_cost
- 10;
15805 return base_cost
+ 10;
15809 /* Find the best place in the insn stream in the range
15810 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
15811 Create the barrier by inserting a jump and add a new fix entry for
15814 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
15816 HOST_WIDE_INT count
= 0;
15817 rtx_barrier
*barrier
;
15818 rtx_insn
*from
= fix
->insn
;
15819 /* The instruction after which we will insert the jump. */
15820 rtx_insn
*selected
= NULL
;
15822 /* The address at which the jump instruction will be placed. */
15823 HOST_WIDE_INT selected_address
;
15825 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
15826 rtx_code_label
*label
= gen_label_rtx ();
15828 selected_cost
= arm_barrier_cost (from
);
15829 selected_address
= fix
->address
;
15831 while (from
&& count
< max_count
)
15833 rtx_jump_table_data
*tmp
;
15836 /* This code shouldn't have been called if there was a natural barrier
15838 gcc_assert (!BARRIER_P (from
));
15840 /* Count the length of this insn. This must stay in sync with the
15841 code that pushes minipool fixes. */
15842 if (LABEL_P (from
))
15843 count
+= get_label_padding (from
);
15845 count
+= get_attr_length (from
);
15847 /* If there is a jump table, add its length. */
15848 if (tablejump_p (from
, NULL
, &tmp
))
15850 count
+= get_jump_table_size (tmp
);
15852 /* Jump tables aren't in a basic block, so base the cost on
15853 the dispatch insn. If we select this location, we will
15854 still put the pool after the table. */
15855 new_cost
= arm_barrier_cost (from
);
15857 if (count
< max_count
15858 && (!selected
|| new_cost
<= selected_cost
))
15861 selected_cost
= new_cost
;
15862 selected_address
= fix
->address
+ count
;
15865 /* Continue after the dispatch table. */
15866 from
= NEXT_INSN (tmp
);
15870 new_cost
= arm_barrier_cost (from
);
15872 if (count
< max_count
15873 && (!selected
|| new_cost
<= selected_cost
))
15876 selected_cost
= new_cost
;
15877 selected_address
= fix
->address
+ count
;
15880 from
= NEXT_INSN (from
);
15883 /* Make sure that we found a place to insert the jump. */
15884 gcc_assert (selected
);
15886 /* Make sure we do not split a call and its corresponding
15887 CALL_ARG_LOCATION note. */
15888 if (CALL_P (selected
))
15890 rtx_insn
*next
= NEXT_INSN (selected
);
15891 if (next
&& NOTE_P (next
)
15892 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
15896 /* Create a new JUMP_INSN that branches around a barrier. */
15897 from
= emit_jump_insn_after (gen_jump (label
), selected
);
15898 JUMP_LABEL (from
) = label
;
15899 barrier
= emit_barrier_after (from
);
15900 emit_label_after (label
, barrier
);
15902 /* Create a minipool barrier entry for the new barrier. */
15903 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
15904 new_fix
->insn
= barrier
;
15905 new_fix
->address
= selected_address
;
15906 new_fix
->next
= fix
->next
;
15907 fix
->next
= new_fix
;
15912 /* Record that there is a natural barrier in the insn stream at
15915 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
15917 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
15920 fix
->address
= address
;
15923 if (minipool_fix_head
!= NULL
)
15924 minipool_fix_tail
->next
= fix
;
15926 minipool_fix_head
= fix
;
15928 minipool_fix_tail
= fix
;
15931 /* Record INSN, which will need fixing up to load a value from the
15932 minipool. ADDRESS is the offset of the insn since the start of the
15933 function; LOC is a pointer to the part of the insn which requires
15934 fixing; VALUE is the constant that must be loaded, which is of type
15937 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
15938 machine_mode mode
, rtx value
)
15940 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
15943 fix
->address
= address
;
15946 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
15947 fix
->value
= value
;
15948 fix
->forwards
= get_attr_pool_range (insn
);
15949 fix
->backwards
= get_attr_neg_pool_range (insn
);
15950 fix
->minipool
= NULL
;
15952 /* If an insn doesn't have a range defined for it, then it isn't
15953 expecting to be reworked by this code. Better to stop now than
15954 to generate duff assembly code. */
15955 gcc_assert (fix
->forwards
|| fix
->backwards
);
15957 /* If an entry requires 8-byte alignment then assume all constant pools
15958 require 4 bytes of padding. Trying to do this later on a per-pool
15959 basis is awkward because existing pool entries have to be modified. */
15960 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
15965 fprintf (dump_file
,
15966 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
15967 GET_MODE_NAME (mode
),
15968 INSN_UID (insn
), (unsigned long) address
,
15969 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
15970 arm_print_value (dump_file
, fix
->value
);
15971 fprintf (dump_file
, "\n");
15974 /* Add it to the chain of fixes. */
15977 if (minipool_fix_head
!= NULL
)
15978 minipool_fix_tail
->next
= fix
;
15980 minipool_fix_head
= fix
;
15982 minipool_fix_tail
= fix
;
15985 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
15986 Returns the number of insns needed, or 99 if we always want to synthesize
15989 arm_max_const_double_inline_cost ()
15991 /* Let the value get synthesized to avoid the use of literal pools. */
15992 if (arm_disable_literal_pool
)
15995 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
15998 /* Return the cost of synthesizing a 64-bit constant VAL inline.
15999 Returns the number of insns needed, or 99 if we don't know how to
16002 arm_const_double_inline_cost (rtx val
)
16004 rtx lowpart
, highpart
;
16007 mode
= GET_MODE (val
);
16009 if (mode
== VOIDmode
)
16012 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16014 lowpart
= gen_lowpart (SImode
, val
);
16015 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16017 gcc_assert (CONST_INT_P (lowpart
));
16018 gcc_assert (CONST_INT_P (highpart
));
16020 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16021 NULL_RTX
, NULL_RTX
, 0, 0)
16022 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16023 NULL_RTX
, NULL_RTX
, 0, 0));
16026 /* Cost of loading a SImode constant. */
16028 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16030 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16031 NULL_RTX
, NULL_RTX
, 1, 0);
16034 /* Return true if it is worthwhile to split a 64-bit constant into two
16035 32-bit operations. This is the case if optimizing for size, or
16036 if we have load delay slots, or if one 32-bit part can be done with
16037 a single data operation. */
16039 arm_const_double_by_parts (rtx val
)
16041 machine_mode mode
= GET_MODE (val
);
16044 if (optimize_size
|| arm_ld_sched
)
16047 if (mode
== VOIDmode
)
16050 part
= gen_highpart_mode (SImode
, mode
, val
);
16052 gcc_assert (CONST_INT_P (part
));
16054 if (const_ok_for_arm (INTVAL (part
))
16055 || const_ok_for_arm (~INTVAL (part
)))
16058 part
= gen_lowpart (SImode
, val
);
16060 gcc_assert (CONST_INT_P (part
));
16062 if (const_ok_for_arm (INTVAL (part
))
16063 || const_ok_for_arm (~INTVAL (part
)))
16069 /* Return true if it is possible to inline both the high and low parts
16070 of a 64-bit constant into 32-bit data processing instructions. */
16072 arm_const_double_by_immediates (rtx val
)
16074 machine_mode mode
= GET_MODE (val
);
16077 if (mode
== VOIDmode
)
16080 part
= gen_highpart_mode (SImode
, mode
, val
);
16082 gcc_assert (CONST_INT_P (part
));
16084 if (!const_ok_for_arm (INTVAL (part
)))
16087 part
= gen_lowpart (SImode
, val
);
16089 gcc_assert (CONST_INT_P (part
));
16091 if (!const_ok_for_arm (INTVAL (part
)))
16097 /* Scan INSN and note any of its operands that need fixing.
16098 If DO_PUSHES is false we do not actually push any of the fixups
16101 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16105 extract_constrain_insn (insn
);
16107 if (recog_data
.n_alternatives
== 0)
16110 /* Fill in recog_op_alt with information about the constraints of
16112 preprocess_constraints (insn
);
16114 const operand_alternative
*op_alt
= which_op_alt ();
16115 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16117 /* Things we need to fix can only occur in inputs. */
16118 if (recog_data
.operand_type
[opno
] != OP_IN
)
16121 /* If this alternative is a memory reference, then any mention
16122 of constants in this alternative is really to fool reload
16123 into allowing us to accept one there. We need to fix them up
16124 now so that we output the right code. */
16125 if (op_alt
[opno
].memory_ok
)
16127 rtx op
= recog_data
.operand
[opno
];
16129 if (CONSTANT_P (op
))
16132 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16133 recog_data
.operand_mode
[opno
], op
);
16135 else if (MEM_P (op
)
16136 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16137 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16141 rtx cop
= avoid_constant_pool_reference (op
);
16143 /* Casting the address of something to a mode narrower
16144 than a word can cause avoid_constant_pool_reference()
16145 to return the pool reference itself. That's no good to
16146 us here. Lets just hope that we can use the
16147 constant pool value directly. */
16149 cop
= get_pool_constant (XEXP (op
, 0));
16151 push_minipool_fix (insn
, address
,
16152 recog_data
.operand_loc
[opno
],
16153 recog_data
.operand_mode
[opno
], cop
);
16163 /* Rewrite move insn into subtract of 0 if the condition codes will
16164 be useful in next conditional jump insn. */
16167 thumb1_reorg (void)
16171 FOR_EACH_BB_FN (bb
, cfun
)
16174 rtx cmp
, op0
, op1
, set
= NULL
;
16175 rtx_insn
*prev
, *insn
= BB_END (bb
);
16176 bool insn_clobbered
= false;
16178 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
16179 insn
= PREV_INSN (insn
);
16181 /* Find the last cbranchsi4_insn in basic block BB. */
16182 if (insn
== BB_HEAD (bb
)
16183 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
16186 /* Get the register with which we are comparing. */
16187 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
16188 op0
= XEXP (cmp
, 0);
16189 op1
= XEXP (cmp
, 1);
16191 /* Check that comparison is against ZERO. */
16192 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
16195 /* Find the first flag setting insn before INSN in basic block BB. */
16196 gcc_assert (insn
!= BB_HEAD (bb
));
16197 for (prev
= PREV_INSN (insn
);
16199 && prev
!= BB_HEAD (bb
)
16201 || DEBUG_INSN_P (prev
)
16202 || ((set
= single_set (prev
)) != NULL
16203 && get_attr_conds (prev
) == CONDS_NOCOND
)));
16204 prev
= PREV_INSN (prev
))
16206 if (reg_set_p (op0
, prev
))
16207 insn_clobbered
= true;
16210 /* Skip if op0 is clobbered by insn other than prev. */
16211 if (insn_clobbered
)
16217 dest
= SET_DEST (set
);
16218 src
= SET_SRC (set
);
16219 if (!low_register_operand (dest
, SImode
)
16220 || !low_register_operand (src
, SImode
))
16223 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16224 in INSN. Both src and dest of the move insn are checked. */
16225 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
16227 dest
= copy_rtx (dest
);
16228 src
= copy_rtx (src
);
16229 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
16230 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
16231 INSN_CODE (prev
) = -1;
16232 /* Set test register in INSN to dest. */
16233 XEXP (cmp
, 0) = copy_rtx (dest
);
16234 INSN_CODE (insn
) = -1;
16239 /* Convert instructions to their cc-clobbering variant if possible, since
16240 that allows us to use smaller encodings. */
16243 thumb2_reorg (void)
16248 INIT_REG_SET (&live
);
16250 /* We are freeing block_for_insn in the toplev to keep compatibility
16251 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16252 compute_bb_for_insn ();
16255 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
16257 FOR_EACH_BB_FN (bb
, cfun
)
16259 if ((current_tune
->disparage_flag_setting_t16_encodings
16260 == tune_params::DISPARAGE_FLAGS_ALL
)
16261 && optimize_bb_for_speed_p (bb
))
16265 Convert_Action action
= SKIP
;
16266 Convert_Action action_for_partial_flag_setting
16267 = ((current_tune
->disparage_flag_setting_t16_encodings
16268 != tune_params::DISPARAGE_FLAGS_NEITHER
)
16269 && optimize_bb_for_speed_p (bb
))
16272 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
16273 df_simulate_initialize_backwards (bb
, &live
);
16274 FOR_BB_INSNS_REVERSE (bb
, insn
)
16276 if (NONJUMP_INSN_P (insn
)
16277 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
16278 && GET_CODE (PATTERN (insn
)) == SET
)
16281 rtx pat
= PATTERN (insn
);
16282 rtx dst
= XEXP (pat
, 0);
16283 rtx src
= XEXP (pat
, 1);
16284 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
16286 if (UNARY_P (src
) || BINARY_P (src
))
16287 op0
= XEXP (src
, 0);
16289 if (BINARY_P (src
))
16290 op1
= XEXP (src
, 1);
16292 if (low_register_operand (dst
, SImode
))
16294 switch (GET_CODE (src
))
16297 /* Adding two registers and storing the result
16298 in the first source is already a 16-bit
16300 if (rtx_equal_p (dst
, op0
)
16301 && register_operand (op1
, SImode
))
16304 if (low_register_operand (op0
, SImode
))
16306 /* ADDS <Rd>,<Rn>,<Rm> */
16307 if (low_register_operand (op1
, SImode
))
16309 /* ADDS <Rdn>,#<imm8> */
16310 /* SUBS <Rdn>,#<imm8> */
16311 else if (rtx_equal_p (dst
, op0
)
16312 && CONST_INT_P (op1
)
16313 && IN_RANGE (INTVAL (op1
), -255, 255))
16315 /* ADDS <Rd>,<Rn>,#<imm3> */
16316 /* SUBS <Rd>,<Rn>,#<imm3> */
16317 else if (CONST_INT_P (op1
)
16318 && IN_RANGE (INTVAL (op1
), -7, 7))
16321 /* ADCS <Rd>, <Rn> */
16322 else if (GET_CODE (XEXP (src
, 0)) == PLUS
16323 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
16324 && low_register_operand (XEXP (XEXP (src
, 0), 1),
16326 && COMPARISON_P (op1
)
16327 && cc_register (XEXP (op1
, 0), VOIDmode
)
16328 && maybe_get_arm_condition_code (op1
) == ARM_CS
16329 && XEXP (op1
, 1) == const0_rtx
)
16334 /* RSBS <Rd>,<Rn>,#0
16335 Not handled here: see NEG below. */
16336 /* SUBS <Rd>,<Rn>,#<imm3>
16338 Not handled here: see PLUS above. */
16339 /* SUBS <Rd>,<Rn>,<Rm> */
16340 if (low_register_operand (op0
, SImode
)
16341 && low_register_operand (op1
, SImode
))
16346 /* MULS <Rdm>,<Rn>,<Rdm>
16347 As an exception to the rule, this is only used
16348 when optimizing for size since MULS is slow on all
16349 known implementations. We do not even want to use
16350 MULS in cold code, if optimizing for speed, so we
16351 test the global flag here. */
16352 if (!optimize_size
)
16354 /* Fall through. */
16358 /* ANDS <Rdn>,<Rm> */
16359 if (rtx_equal_p (dst
, op0
)
16360 && low_register_operand (op1
, SImode
))
16361 action
= action_for_partial_flag_setting
;
16362 else if (rtx_equal_p (dst
, op1
)
16363 && low_register_operand (op0
, SImode
))
16364 action
= action_for_partial_flag_setting
== SKIP
16365 ? SKIP
: SWAP_CONV
;
16371 /* ASRS <Rdn>,<Rm> */
16372 /* LSRS <Rdn>,<Rm> */
16373 /* LSLS <Rdn>,<Rm> */
16374 if (rtx_equal_p (dst
, op0
)
16375 && low_register_operand (op1
, SImode
))
16376 action
= action_for_partial_flag_setting
;
16377 /* ASRS <Rd>,<Rm>,#<imm5> */
16378 /* LSRS <Rd>,<Rm>,#<imm5> */
16379 /* LSLS <Rd>,<Rm>,#<imm5> */
16380 else if (low_register_operand (op0
, SImode
)
16381 && CONST_INT_P (op1
)
16382 && IN_RANGE (INTVAL (op1
), 0, 31))
16383 action
= action_for_partial_flag_setting
;
16387 /* RORS <Rdn>,<Rm> */
16388 if (rtx_equal_p (dst
, op0
)
16389 && low_register_operand (op1
, SImode
))
16390 action
= action_for_partial_flag_setting
;
16394 /* MVNS <Rd>,<Rm> */
16395 if (low_register_operand (op0
, SImode
))
16396 action
= action_for_partial_flag_setting
;
16400 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16401 if (low_register_operand (op0
, SImode
))
16406 /* MOVS <Rd>,#<imm8> */
16407 if (CONST_INT_P (src
)
16408 && IN_RANGE (INTVAL (src
), 0, 255))
16409 action
= action_for_partial_flag_setting
;
16413 /* MOVS and MOV<c> with registers have different
16414 encodings, so are not relevant here. */
16422 if (action
!= SKIP
)
16424 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
16425 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
16428 if (action
== SWAP_CONV
)
16430 src
= copy_rtx (src
);
16431 XEXP (src
, 0) = op1
;
16432 XEXP (src
, 1) = op0
;
16433 pat
= gen_rtx_SET (dst
, src
);
16434 vec
= gen_rtvec (2, pat
, clobber
);
16436 else /* action == CONV */
16437 vec
= gen_rtvec (2, pat
, clobber
);
16439 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
16440 INSN_CODE (insn
) = -1;
16444 if (NONDEBUG_INSN_P (insn
))
16445 df_simulate_one_insn_backwards (bb
, insn
, &live
);
16449 CLEAR_REG_SET (&live
);
16452 /* Gcc puts the pool in the wrong place for ARM, since we can only
16453 load addresses a limited distance around the pc. We do some
16454 special munging to move the constant pool values to the correct
16455 point in the code. */
16460 HOST_WIDE_INT address
= 0;
16465 else if (TARGET_THUMB2
)
16468 /* Ensure all insns that must be split have been split at this point.
16469 Otherwise, the pool placement code below may compute incorrect
16470 insn lengths. Note that when optimizing, all insns have already
16471 been split at this point. */
16473 split_all_insns_noflow ();
16475 minipool_fix_head
= minipool_fix_tail
= NULL
;
16477 /* The first insn must always be a note, or the code below won't
16478 scan it properly. */
16479 insn
= get_insns ();
16480 gcc_assert (NOTE_P (insn
));
16483 /* Scan all the insns and record the operands that will need fixing. */
16484 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
16486 if (BARRIER_P (insn
))
16487 push_minipool_barrier (insn
, address
);
16488 else if (INSN_P (insn
))
16490 rtx_jump_table_data
*table
;
16492 note_invalid_constants (insn
, address
, true);
16493 address
+= get_attr_length (insn
);
16495 /* If the insn is a vector jump, add the size of the table
16496 and skip the table. */
16497 if (tablejump_p (insn
, NULL
, &table
))
16499 address
+= get_jump_table_size (table
);
16503 else if (LABEL_P (insn
))
16504 /* Add the worst-case padding due to alignment. We don't add
16505 the _current_ padding because the minipool insertions
16506 themselves might change it. */
16507 address
+= get_label_padding (insn
);
16510 fix
= minipool_fix_head
;
16512 /* Now scan the fixups and perform the required changes. */
16517 Mfix
* last_added_fix
;
16518 Mfix
* last_barrier
= NULL
;
16521 /* Skip any further barriers before the next fix. */
16522 while (fix
&& BARRIER_P (fix
->insn
))
16525 /* No more fixes. */
16529 last_added_fix
= NULL
;
16531 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
16533 if (BARRIER_P (ftmp
->insn
))
16535 if (ftmp
->address
>= minipool_vector_head
->max_address
)
16538 last_barrier
= ftmp
;
16540 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
16543 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
16546 /* If we found a barrier, drop back to that; any fixes that we
16547 could have reached but come after the barrier will now go in
16548 the next mini-pool. */
16549 if (last_barrier
!= NULL
)
16551 /* Reduce the refcount for those fixes that won't go into this
16553 for (fdel
= last_barrier
->next
;
16554 fdel
&& fdel
!= ftmp
;
16557 fdel
->minipool
->refcount
--;
16558 fdel
->minipool
= NULL
;
16561 ftmp
= last_barrier
;
16565 /* ftmp is first fix that we can't fit into this pool and
16566 there no natural barriers that we could use. Insert a
16567 new barrier in the code somewhere between the previous
16568 fix and this one, and arrange to jump around it. */
16569 HOST_WIDE_INT max_address
;
16571 /* The last item on the list of fixes must be a barrier, so
16572 we can never run off the end of the list of fixes without
16573 last_barrier being set. */
16576 max_address
= minipool_vector_head
->max_address
;
16577 /* Check that there isn't another fix that is in range that
16578 we couldn't fit into this pool because the pool was
16579 already too large: we need to put the pool before such an
16580 instruction. The pool itself may come just after the
16581 fix because create_fix_barrier also allows space for a
16582 jump instruction. */
16583 if (ftmp
->address
< max_address
)
16584 max_address
= ftmp
->address
+ 1;
16586 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
16589 assign_minipool_offsets (last_barrier
);
16593 if (!BARRIER_P (ftmp
->insn
)
16594 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
16601 /* Scan over the fixes we have identified for this pool, fixing them
16602 up and adding the constants to the pool itself. */
16603 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
16604 this_fix
= this_fix
->next
)
16605 if (!BARRIER_P (this_fix
->insn
))
16608 = plus_constant (Pmode
,
16609 gen_rtx_LABEL_REF (VOIDmode
,
16610 minipool_vector_label
),
16611 this_fix
->minipool
->offset
);
16612 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
16615 dump_minipool (last_barrier
->insn
);
16619 /* From now on we must synthesize any constants that we can't handle
16620 directly. This can happen if the RTL gets split during final
16621 instruction generation. */
16622 cfun
->machine
->after_arm_reorg
= 1;
16624 /* Free the minipool memory. */
16625 obstack_free (&minipool_obstack
, minipool_startobj
);
16628 /* Routines to output assembly language. */
16630 /* Return string representation of passed in real value. */
16631 static const char *
16632 fp_const_from_val (REAL_VALUE_TYPE
*r
)
16634 if (!fp_consts_inited
)
16637 gcc_assert (real_equal (r
, &value_fp0
));
16641 /* OPERANDS[0] is the entire list of insns that constitute pop,
16642 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
16643 is in the list, UPDATE is true iff the list contains explicit
16644 update of base register. */
16646 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
16652 const char *conditional
;
16653 int num_saves
= XVECLEN (operands
[0], 0);
16654 unsigned int regno
;
16655 unsigned int regno_base
= REGNO (operands
[1]);
16656 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
16659 offset
+= update
? 1 : 0;
16660 offset
+= return_pc
? 1 : 0;
16662 /* Is the base register in the list? */
16663 for (i
= offset
; i
< num_saves
; i
++)
16665 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
16666 /* If SP is in the list, then the base register must be SP. */
16667 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
16668 /* If base register is in the list, there must be no explicit update. */
16669 if (regno
== regno_base
)
16670 gcc_assert (!update
);
16673 conditional
= reverse
? "%?%D0" : "%?%d0";
16674 /* Can't use POP if returning from an interrupt. */
16675 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
16676 sprintf (pattern
, "pop%s\t{", conditional
);
16679 /* Output ldmfd when the base register is SP, otherwise output ldmia.
16680 It's just a convention, their semantics are identical. */
16681 if (regno_base
== SP_REGNUM
)
16682 sprintf (pattern
, "ldmfd%s\t", conditional
);
16684 sprintf (pattern
, "ldmia%s\t", conditional
);
16686 sprintf (pattern
, "ldm%s\t", conditional
);
16688 strcat (pattern
, reg_names
[regno_base
]);
16690 strcat (pattern
, "!, {");
16692 strcat (pattern
, ", {");
16695 /* Output the first destination register. */
16697 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
16699 /* Output the rest of the destination registers. */
16700 for (i
= offset
+ 1; i
< num_saves
; i
++)
16702 strcat (pattern
, ", ");
16704 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
16707 strcat (pattern
, "}");
16709 if (interrupt_p
&& return_pc
)
16710 strcat (pattern
, "^");
16712 output_asm_insn (pattern
, &cond
);
16716 /* Output the assembly for a store multiple. */
16719 vfp_output_vstmd (rtx
* operands
)
16725 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
16726 ? XEXP (operands
[0], 0)
16727 : XEXP (XEXP (operands
[0], 0), 0);
16728 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
16731 strcpy (pattern
, "vpush%?.64\t{%P1");
16733 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
16735 p
= strlen (pattern
);
16737 gcc_assert (REG_P (operands
[1]));
16739 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
16740 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
16742 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
16744 strcpy (&pattern
[p
], "}");
16746 output_asm_insn (pattern
, operands
);
16751 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
16752 number of bytes pushed. */
16755 vfp_emit_fstmd (int base_reg
, int count
)
16762 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
16763 register pairs are stored by a store multiple insn. We avoid this
16764 by pushing an extra pair. */
16765 if (count
== 2 && !arm_arch6
)
16767 if (base_reg
== LAST_VFP_REGNUM
- 3)
16772 /* FSTMD may not store more than 16 doubleword registers at once. Split
16773 larger stores into multiple parts (up to a maximum of two, in
16778 /* NOTE: base_reg is an internal register number, so each D register
16780 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
16781 saved
+= vfp_emit_fstmd (base_reg
, 16);
16785 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
16786 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
16788 reg
= gen_rtx_REG (DFmode
, base_reg
);
16791 XVECEXP (par
, 0, 0)
16792 = gen_rtx_SET (gen_frame_mem
16794 gen_rtx_PRE_MODIFY (Pmode
,
16797 (Pmode
, stack_pointer_rtx
,
16800 gen_rtx_UNSPEC (BLKmode
,
16801 gen_rtvec (1, reg
),
16802 UNSPEC_PUSH_MULT
));
16804 tmp
= gen_rtx_SET (stack_pointer_rtx
,
16805 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
16806 RTX_FRAME_RELATED_P (tmp
) = 1;
16807 XVECEXP (dwarf
, 0, 0) = tmp
;
16809 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
16810 RTX_FRAME_RELATED_P (tmp
) = 1;
16811 XVECEXP (dwarf
, 0, 1) = tmp
;
16813 for (i
= 1; i
< count
; i
++)
16815 reg
= gen_rtx_REG (DFmode
, base_reg
);
16817 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
16819 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
16820 plus_constant (Pmode
,
16824 RTX_FRAME_RELATED_P (tmp
) = 1;
16825 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
16828 par
= emit_insn (par
);
16829 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
16830 RTX_FRAME_RELATED_P (par
) = 1;
16835 /* Emit a call instruction with pattern PAT. ADDR is the address of
16836 the call target. */
16839 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
16843 insn
= emit_call_insn (pat
);
16845 /* The PIC register is live on entry to VxWorks PIC PLT entries.
16846 If the call might use such an entry, add a use of the PIC register
16847 to the instruction's CALL_INSN_FUNCTION_USAGE. */
16848 if (TARGET_VXWORKS_RTP
16851 && GET_CODE (addr
) == SYMBOL_REF
16852 && (SYMBOL_REF_DECL (addr
)
16853 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
16854 : !SYMBOL_REF_LOCAL_P (addr
)))
16856 require_pic_register ();
16857 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
16860 if (TARGET_AAPCS_BASED
)
16862 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
16863 linker. We need to add an IP clobber to allow setting
16864 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
16865 is not needed since it's a fixed register. */
16866 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
16867 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
16871 /* Output a 'call' insn. */
16873 output_call (rtx
*operands
)
16875 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
16877 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
16878 if (REGNO (operands
[0]) == LR_REGNUM
)
16880 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
16881 output_asm_insn ("mov%?\t%0, %|lr", operands
);
16884 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
16886 if (TARGET_INTERWORK
|| arm_arch4t
)
16887 output_asm_insn ("bx%?\t%0", operands
);
16889 output_asm_insn ("mov%?\t%|pc, %0", operands
);
16894 /* Output a move from arm registers to arm registers of a long double
16895 OPERANDS[0] is the destination.
16896 OPERANDS[1] is the source. */
16898 output_mov_long_double_arm_from_arm (rtx
*operands
)
16900 /* We have to be careful here because the two might overlap. */
16901 int dest_start
= REGNO (operands
[0]);
16902 int src_start
= REGNO (operands
[1]);
16906 if (dest_start
< src_start
)
16908 for (i
= 0; i
< 3; i
++)
16910 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
16911 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
16912 output_asm_insn ("mov%?\t%0, %1", ops
);
16917 for (i
= 2; i
>= 0; i
--)
16919 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
16920 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
16921 output_asm_insn ("mov%?\t%0, %1", ops
);
16929 arm_emit_movpair (rtx dest
, rtx src
)
16933 /* If the src is an immediate, simplify it. */
16934 if (CONST_INT_P (src
))
16936 HOST_WIDE_INT val
= INTVAL (src
);
16937 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
16938 if ((val
>> 16) & 0x0000ffff)
16940 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
16942 GEN_INT ((val
>> 16) & 0x0000ffff));
16943 insn
= get_last_insn ();
16944 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
16948 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
16949 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
16950 insn
= get_last_insn ();
16951 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
16954 /* Output a move between double words. It must be REG<-MEM
16957 output_move_double (rtx
*operands
, bool emit
, int *count
)
16959 enum rtx_code code0
= GET_CODE (operands
[0]);
16960 enum rtx_code code1
= GET_CODE (operands
[1]);
16965 /* The only case when this might happen is when
16966 you are looking at the length of a DImode instruction
16967 that has an invalid constant in it. */
16968 if (code0
== REG
&& code1
!= MEM
)
16970 gcc_assert (!emit
);
16977 unsigned int reg0
= REGNO (operands
[0]);
16979 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
16981 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
16983 switch (GET_CODE (XEXP (operands
[1], 0)))
16990 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
16991 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
16993 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
16998 gcc_assert (TARGET_LDRD
);
17000 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17007 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17009 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
17017 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
17019 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
17024 gcc_assert (TARGET_LDRD
);
17026 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
17031 /* Autoicrement addressing modes should never have overlapping
17032 base and destination registers, and overlapping index registers
17033 are already prohibited, so this doesn't need to worry about
17035 otherops
[0] = operands
[0];
17036 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17037 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17039 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17041 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17043 /* Registers overlap so split out the increment. */
17046 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
17047 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
17054 /* Use a single insn if we can.
17055 FIXME: IWMMXT allows offsets larger than ldrd can
17056 handle, fix these up with a pair of ldr. */
17058 || !CONST_INT_P (otherops
[2])
17059 || (INTVAL (otherops
[2]) > -256
17060 && INTVAL (otherops
[2]) < 256))
17063 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
17069 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
17070 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17080 /* Use a single insn if we can.
17081 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17082 fix these up with a pair of ldr. */
17084 || !CONST_INT_P (otherops
[2])
17085 || (INTVAL (otherops
[2]) > -256
17086 && INTVAL (otherops
[2]) < 256))
17089 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
17095 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
17096 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
17106 /* We might be able to use ldrd %0, %1 here. However the range is
17107 different to ldr/adr, and it is broken on some ARMv7-M
17108 implementations. */
17109 /* Use the second register of the pair to avoid problematic
17111 otherops
[1] = operands
[1];
17113 output_asm_insn ("adr%?\t%0, %1", otherops
);
17114 operands
[1] = otherops
[0];
17118 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
17120 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
17127 /* ??? This needs checking for thumb2. */
17129 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
17130 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
17132 otherops
[0] = operands
[0];
17133 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
17134 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
17136 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
17138 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17140 switch ((int) INTVAL (otherops
[2]))
17144 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
17150 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
17156 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
17160 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
17161 operands
[1] = otherops
[0];
17163 && (REG_P (otherops
[2])
17165 || (CONST_INT_P (otherops
[2])
17166 && INTVAL (otherops
[2]) > -256
17167 && INTVAL (otherops
[2]) < 256)))
17169 if (reg_overlap_mentioned_p (operands
[0],
17172 /* Swap base and index registers over to
17173 avoid a conflict. */
17174 std::swap (otherops
[1], otherops
[2]);
17176 /* If both registers conflict, it will usually
17177 have been fixed by a splitter. */
17178 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
17179 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
17183 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17184 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
17191 otherops
[0] = operands
[0];
17193 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
17198 if (CONST_INT_P (otherops
[2]))
17202 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
17203 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
17205 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17211 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
17217 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
17224 return "ldrd%?\t%0, [%1]";
17226 return "ldmia%?\t%1, %M0";
17230 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
17231 /* Take care of overlapping base/data reg. */
17232 if (reg_mentioned_p (operands
[0], operands
[1]))
17236 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17237 output_asm_insn ("ldr%?\t%0, %1", operands
);
17247 output_asm_insn ("ldr%?\t%0, %1", operands
);
17248 output_asm_insn ("ldr%?\t%0, %1", otherops
);
17258 /* Constraints should ensure this. */
17259 gcc_assert (code0
== MEM
&& code1
== REG
);
17260 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
17261 || (TARGET_ARM
&& TARGET_LDRD
));
17263 switch (GET_CODE (XEXP (operands
[0], 0)))
17269 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
17271 output_asm_insn ("stm%?\t%m0, %M1", operands
);
17276 gcc_assert (TARGET_LDRD
);
17278 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
17285 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
17287 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
17295 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
17297 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
17302 gcc_assert (TARGET_LDRD
);
17304 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
17309 otherops
[0] = operands
[1];
17310 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
17311 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
17313 /* IWMMXT allows offsets larger than ldrd can handle,
17314 fix these up with a pair of ldr. */
17316 && CONST_INT_P (otherops
[2])
17317 && (INTVAL(otherops
[2]) <= -256
17318 || INTVAL(otherops
[2]) >= 256))
17320 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17324 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
17325 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17334 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
17335 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
17341 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
17344 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
17349 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
17354 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
17355 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
17357 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
17361 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
17368 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
17375 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
17380 && (REG_P (otherops
[2])
17382 || (CONST_INT_P (otherops
[2])
17383 && INTVAL (otherops
[2]) > -256
17384 && INTVAL (otherops
[2]) < 256)))
17386 otherops
[0] = operands
[1];
17387 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
17389 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
17395 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
17396 otherops
[1] = operands
[1];
17399 output_asm_insn ("str%?\t%1, %0", operands
);
17400 output_asm_insn ("str%?\t%H1, %0", otherops
);
17410 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17411 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17414 output_move_quad (rtx
*operands
)
17416 if (REG_P (operands
[0]))
17418 /* Load, or reg->reg move. */
17420 if (MEM_P (operands
[1]))
17422 switch (GET_CODE (XEXP (operands
[1], 0)))
17425 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17430 output_asm_insn ("adr%?\t%0, %1", operands
);
17431 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
17435 gcc_unreachable ();
17443 gcc_assert (REG_P (operands
[1]));
17445 dest
= REGNO (operands
[0]);
17446 src
= REGNO (operands
[1]);
17448 /* This seems pretty dumb, but hopefully GCC won't try to do it
17451 for (i
= 0; i
< 4; i
++)
17453 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17454 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17455 output_asm_insn ("mov%?\t%0, %1", ops
);
17458 for (i
= 3; i
>= 0; i
--)
17460 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
17461 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
17462 output_asm_insn ("mov%?\t%0, %1", ops
);
17468 gcc_assert (MEM_P (operands
[0]));
17469 gcc_assert (REG_P (operands
[1]));
17470 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
17472 switch (GET_CODE (XEXP (operands
[0], 0)))
17475 output_asm_insn ("stm%?\t%m0, %M1", operands
);
17479 gcc_unreachable ();
17486 /* Output a VFP load or store instruction. */
17489 output_move_vfp (rtx
*operands
)
17491 rtx reg
, mem
, addr
, ops
[2];
17492 int load
= REG_P (operands
[0]);
17493 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
17494 int sp
= (!TARGET_VFP_FP16INST
17495 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
17496 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
17501 reg
= operands
[!load
];
17502 mem
= operands
[load
];
17504 mode
= GET_MODE (reg
);
17506 gcc_assert (REG_P (reg
));
17507 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
17508 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
17514 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
17515 gcc_assert (MEM_P (mem
));
17517 addr
= XEXP (mem
, 0);
17519 switch (GET_CODE (addr
))
17522 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
17523 ops
[0] = XEXP (addr
, 0);
17528 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
17529 ops
[0] = XEXP (addr
, 0);
17534 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
17540 sprintf (buff
, templ
,
17541 load
? "ld" : "st",
17542 dp
? "64" : sp
? "32" : "16",
17544 integer_p
? "\t%@ int" : "");
17545 output_asm_insn (buff
, ops
);
17550 /* Output a Neon double-word or quad-word load or store, or a load
17551 or store for larger structure modes.
17553 WARNING: The ordering of elements is weird in big-endian mode,
17554 because the EABI requires that vectors stored in memory appear
17555 as though they were stored by a VSTM, as required by the EABI.
17556 GCC RTL defines element ordering based on in-memory order.
17557 This can be different from the architectural ordering of elements
17558 within a NEON register. The intrinsics defined in arm_neon.h use the
17559 NEON register element ordering, not the GCC RTL element ordering.
17561 For example, the in-memory ordering of a big-endian a quadword
17562 vector with 16-bit elements when stored from register pair {d0,d1}
17563 will be (lowest address first, d0[N] is NEON register element N):
17565 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
17567 When necessary, quadword registers (dN, dN+1) are moved to ARM
17568 registers from rN in the order:
17570 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
17572 So that STM/LDM can be used on vectors in ARM registers, and the
17573 same memory layout will result as if VSTM/VLDM were used.
17575 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
17576 possible, which allows use of appropriate alignment tags.
17577 Note that the choice of "64" is independent of the actual vector
17578 element size; this size simply ensures that the behavior is
17579 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
17581 Due to limitations of those instructions, use of VST1.64/VLD1.64
17582 is not possible if:
17583 - the address contains PRE_DEC, or
17584 - the mode refers to more than 4 double-word registers
17586 In those cases, it would be possible to replace VSTM/VLDM by a
17587 sequence of instructions; this is not currently implemented since
17588 this is not certain to actually improve performance. */
17591 output_move_neon (rtx
*operands
)
17593 rtx reg
, mem
, addr
, ops
[2];
17594 int regno
, nregs
, load
= REG_P (operands
[0]);
17599 reg
= operands
[!load
];
17600 mem
= operands
[load
];
17602 mode
= GET_MODE (reg
);
17604 gcc_assert (REG_P (reg
));
17605 regno
= REGNO (reg
);
17606 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
17607 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
17608 || NEON_REGNO_OK_FOR_QUAD (regno
));
17609 gcc_assert (VALID_NEON_DREG_MODE (mode
)
17610 || VALID_NEON_QREG_MODE (mode
)
17611 || VALID_NEON_STRUCT_MODE (mode
));
17612 gcc_assert (MEM_P (mem
));
17614 addr
= XEXP (mem
, 0);
17616 /* Strip off const from addresses like (const (plus (...))). */
17617 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
17618 addr
= XEXP (addr
, 0);
17620 switch (GET_CODE (addr
))
17623 /* We have to use vldm / vstm for too-large modes. */
17626 templ
= "v%smia%%?\t%%0!, %%h1";
17627 ops
[0] = XEXP (addr
, 0);
17631 templ
= "v%s1.64\t%%h1, %%A0";
17638 /* We have to use vldm / vstm in this case, since there is no
17639 pre-decrement form of the vld1 / vst1 instructions. */
17640 templ
= "v%smdb%%?\t%%0!, %%h1";
17641 ops
[0] = XEXP (addr
, 0);
17646 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
17647 gcc_unreachable ();
17650 /* We have to use vldm / vstm for too-large modes. */
17654 templ
= "v%smia%%?\t%%m0, %%h1";
17656 templ
= "v%s1.64\t%%h1, %%A0";
17662 /* Fall through. */
17668 for (i
= 0; i
< nregs
; i
++)
17670 /* We're only using DImode here because it's a convenient size. */
17671 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
17672 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
17673 if (reg_overlap_mentioned_p (ops
[0], mem
))
17675 gcc_assert (overlap
== -1);
17680 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
17681 output_asm_insn (buff
, ops
);
17686 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
17687 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
17688 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
17689 output_asm_insn (buff
, ops
);
17696 gcc_unreachable ();
17699 sprintf (buff
, templ
, load
? "ld" : "st");
17700 output_asm_insn (buff
, ops
);
17705 /* Compute and return the length of neon_mov<mode>, where <mode> is
17706 one of VSTRUCT modes: EI, OI, CI or XI. */
17708 arm_attr_length_move_neon (rtx_insn
*insn
)
17710 rtx reg
, mem
, addr
;
17714 extract_insn_cached (insn
);
17716 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
17718 mode
= GET_MODE (recog_data
.operand
[0]);
17729 gcc_unreachable ();
17733 load
= REG_P (recog_data
.operand
[0]);
17734 reg
= recog_data
.operand
[!load
];
17735 mem
= recog_data
.operand
[load
];
17737 gcc_assert (MEM_P (mem
));
17739 mode
= GET_MODE (reg
);
17740 addr
= XEXP (mem
, 0);
17742 /* Strip off const from addresses like (const (plus (...))). */
17743 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
17744 addr
= XEXP (addr
, 0);
17746 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
17748 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
17755 /* Return nonzero if the offset in the address is an immediate. Otherwise,
17759 arm_address_offset_is_imm (rtx_insn
*insn
)
17763 extract_insn_cached (insn
);
17765 if (REG_P (recog_data
.operand
[0]))
17768 mem
= recog_data
.operand
[0];
17770 gcc_assert (MEM_P (mem
));
17772 addr
= XEXP (mem
, 0);
17775 || (GET_CODE (addr
) == PLUS
17776 && REG_P (XEXP (addr
, 0))
17777 && CONST_INT_P (XEXP (addr
, 1))))
17783 /* Output an ADD r, s, #n where n may be too big for one instruction.
17784 If adding zero to one register, output nothing. */
17786 output_add_immediate (rtx
*operands
)
17788 HOST_WIDE_INT n
= INTVAL (operands
[2]);
17790 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
17793 output_multi_immediate (operands
,
17794 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
17797 output_multi_immediate (operands
,
17798 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
17805 /* Output a multiple immediate operation.
17806 OPERANDS is the vector of operands referred to in the output patterns.
17807 INSTR1 is the output pattern to use for the first constant.
17808 INSTR2 is the output pattern to use for subsequent constants.
17809 IMMED_OP is the index of the constant slot in OPERANDS.
17810 N is the constant value. */
17811 static const char *
17812 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
17813 int immed_op
, HOST_WIDE_INT n
)
17815 #if HOST_BITS_PER_WIDE_INT > 32
17821 /* Quick and easy output. */
17822 operands
[immed_op
] = const0_rtx
;
17823 output_asm_insn (instr1
, operands
);
17828 const char * instr
= instr1
;
17830 /* Note that n is never zero here (which would give no output). */
17831 for (i
= 0; i
< 32; i
+= 2)
17835 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
17836 output_asm_insn (instr
, operands
);
17846 /* Return the name of a shifter operation. */
17847 static const char *
17848 arm_shift_nmem(enum rtx_code code
)
17853 return ARM_LSL_NAME
;
17869 /* Return the appropriate ARM instruction for the operation code.
17870 The returned result should not be overwritten. OP is the rtx of the
17871 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
17874 arithmetic_instr (rtx op
, int shift_first_arg
)
17876 switch (GET_CODE (op
))
17882 return shift_first_arg
? "rsb" : "sub";
17897 return arm_shift_nmem(GET_CODE(op
));
17900 gcc_unreachable ();
17904 /* Ensure valid constant shifts and return the appropriate shift mnemonic
17905 for the operation code. The returned result should not be overwritten.
17906 OP is the rtx code of the shift.
17907 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
17909 static const char *
17910 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
17913 enum rtx_code code
= GET_CODE (op
);
17918 if (!CONST_INT_P (XEXP (op
, 1)))
17920 output_operand_lossage ("invalid shift operand");
17925 *amountp
= 32 - INTVAL (XEXP (op
, 1));
17933 mnem
= arm_shift_nmem(code
);
17934 if (CONST_INT_P (XEXP (op
, 1)))
17936 *amountp
= INTVAL (XEXP (op
, 1));
17938 else if (REG_P (XEXP (op
, 1)))
17945 output_operand_lossage ("invalid shift operand");
17951 /* We never have to worry about the amount being other than a
17952 power of 2, since this case can never be reloaded from a reg. */
17953 if (!CONST_INT_P (XEXP (op
, 1)))
17955 output_operand_lossage ("invalid shift operand");
17959 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
17961 /* Amount must be a power of two. */
17962 if (*amountp
& (*amountp
- 1))
17964 output_operand_lossage ("invalid shift operand");
17968 *amountp
= exact_log2 (*amountp
);
17969 gcc_assert (IN_RANGE (*amountp
, 0, 31));
17970 return ARM_LSL_NAME
;
17973 output_operand_lossage ("invalid shift operand");
17977 /* This is not 100% correct, but follows from the desire to merge
17978 multiplication by a power of 2 with the recognizer for a
17979 shift. >=32 is not a valid shift for "lsl", so we must try and
17980 output a shift that produces the correct arithmetical result.
17981 Using lsr #32 is identical except for the fact that the carry bit
17982 is not set correctly if we set the flags; but we never use the
17983 carry bit from such an operation, so we can ignore that. */
17984 if (code
== ROTATERT
)
17985 /* Rotate is just modulo 32. */
17987 else if (*amountp
!= (*amountp
& 31))
17989 if (code
== ASHIFT
)
17994 /* Shifts of 0 are no-ops. */
18001 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18002 because /bin/as is horribly restrictive. The judgement about
18003 whether or not each character is 'printable' (and can be output as
18004 is) or not (and must be printed with an octal escape) must be made
18005 with reference to the *host* character set -- the situation is
18006 similar to that discussed in the comments above pp_c_char in
18007 c-pretty-print.c. */
18009 #define MAX_ASCII_LEN 51
18012 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18015 int len_so_far
= 0;
18017 fputs ("\t.ascii\t\"", stream
);
18019 for (i
= 0; i
< len
; i
++)
18023 if (len_so_far
>= MAX_ASCII_LEN
)
18025 fputs ("\"\n\t.ascii\t\"", stream
);
18031 if (c
== '\\' || c
== '\"')
18033 putc ('\\', stream
);
18041 fprintf (stream
, "\\%03o", c
);
18046 fputs ("\"\n", stream
);
18049 /* Whether a register is callee saved or not. This is necessary because high
18050 registers are marked as caller saved when optimizing for size on Thumb-1
18051 targets despite being callee saved in order to avoid using them. */
18052 #define callee_saved_reg_p(reg) \
18053 (!call_used_regs[reg] \
18054 || (TARGET_THUMB1 && optimize_size \
18055 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18057 /* Compute the register save mask for registers 0 through 12
18058 inclusive. This code is used by arm_compute_save_reg_mask. */
18060 static unsigned long
18061 arm_compute_save_reg0_reg12_mask (void)
18063 unsigned long func_type
= arm_current_func_type ();
18064 unsigned long save_reg_mask
= 0;
18067 if (IS_INTERRUPT (func_type
))
18069 unsigned int max_reg
;
18070 /* Interrupt functions must not corrupt any registers,
18071 even call clobbered ones. If this is a leaf function
18072 we can just examine the registers used by the RTL, but
18073 otherwise we have to assume that whatever function is
18074 called might clobber anything, and so we have to save
18075 all the call-clobbered registers as well. */
18076 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
18077 /* FIQ handlers have registers r8 - r12 banked, so
18078 we only need to check r0 - r7, Normal ISRs only
18079 bank r14 and r15, so we must check up to r12.
18080 r13 is the stack pointer which is always preserved,
18081 so we do not need to consider it here. */
18086 for (reg
= 0; reg
<= max_reg
; reg
++)
18087 if (df_regs_ever_live_p (reg
)
18088 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
18089 save_reg_mask
|= (1 << reg
);
18091 /* Also save the pic base register if necessary. */
18093 && !TARGET_SINGLE_PIC_BASE
18094 && arm_pic_register
!= INVALID_REGNUM
18095 && crtl
->uses_pic_offset_table
)
18096 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18098 else if (IS_VOLATILE(func_type
))
18100 /* For noreturn functions we historically omitted register saves
18101 altogether. However this really messes up debugging. As a
18102 compromise save just the frame pointers. Combined with the link
18103 register saved elsewhere this should be sufficient to get
18105 if (frame_pointer_needed
)
18106 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18107 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
18108 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18109 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
18110 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
18114 /* In the normal case we only need to save those registers
18115 which are call saved and which are used by this function. */
18116 for (reg
= 0; reg
<= 11; reg
++)
18117 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
18118 save_reg_mask
|= (1 << reg
);
18120 /* Handle the frame pointer as a special case. */
18121 if (frame_pointer_needed
)
18122 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
18124 /* If we aren't loading the PIC register,
18125 don't stack it even though it may be live. */
18127 && !TARGET_SINGLE_PIC_BASE
18128 && arm_pic_register
!= INVALID_REGNUM
18129 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
18130 || crtl
->uses_pic_offset_table
))
18131 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18133 /* The prologue will copy SP into R0, so save it. */
18134 if (IS_STACKALIGN (func_type
))
18135 save_reg_mask
|= 1;
18138 /* Save registers so the exception handler can modify them. */
18139 if (crtl
->calls_eh_return
)
18145 reg
= EH_RETURN_DATA_REGNO (i
);
18146 if (reg
== INVALID_REGNUM
)
18148 save_reg_mask
|= 1 << reg
;
18152 return save_reg_mask
;
18155 /* Return true if r3 is live at the start of the function. */
18158 arm_r3_live_at_start_p (void)
18160 /* Just look at cfg info, which is still close enough to correct at this
18161 point. This gives false positives for broken functions that might use
18162 uninitialized data that happens to be allocated in r3, but who cares? */
18163 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
18166 /* Compute the number of bytes used to store the static chain register on the
18167 stack, above the stack frame. We need to know this accurately to get the
18168 alignment of the rest of the stack frame correct. */
18171 arm_compute_static_chain_stack_bytes (void)
18173 /* See the defining assertion in arm_expand_prologue. */
18174 if (IS_NESTED (arm_current_func_type ())
18175 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18176 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
18177 && !df_regs_ever_live_p (LR_REGNUM
)))
18178 && arm_r3_live_at_start_p ()
18179 && crtl
->args
.pretend_args_size
== 0)
18185 /* Compute a bit mask of which registers need to be
18186 saved on the stack for the current function.
18187 This is used by arm_get_frame_offsets, which may add extra registers. */
18189 static unsigned long
18190 arm_compute_save_reg_mask (void)
18192 unsigned int save_reg_mask
= 0;
18193 unsigned long func_type
= arm_current_func_type ();
18196 if (IS_NAKED (func_type
))
18197 /* This should never really happen. */
18200 /* If we are creating a stack frame, then we must save the frame pointer,
18201 IP (which will hold the old stack pointer), LR and the PC. */
18202 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
18204 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
18207 | (1 << PC_REGNUM
);
18209 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
18211 /* Decide if we need to save the link register.
18212 Interrupt routines have their own banked link register,
18213 so they never need to save it.
18214 Otherwise if we do not use the link register we do not need to save
18215 it. If we are pushing other registers onto the stack however, we
18216 can save an instruction in the epilogue by pushing the link register
18217 now and then popping it back into the PC. This incurs extra memory
18218 accesses though, so we only do it when optimizing for size, and only
18219 if we know that we will not need a fancy return sequence. */
18220 if (df_regs_ever_live_p (LR_REGNUM
)
18223 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
18224 && !crtl
->tail_call_emit
18225 && !crtl
->calls_eh_return
))
18226 save_reg_mask
|= 1 << LR_REGNUM
;
18228 if (cfun
->machine
->lr_save_eliminated
)
18229 save_reg_mask
&= ~ (1 << LR_REGNUM
);
18231 if (TARGET_REALLY_IWMMXT
18232 && ((bit_count (save_reg_mask
)
18233 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
18234 arm_compute_static_chain_stack_bytes())
18237 /* The total number of registers that are going to be pushed
18238 onto the stack is odd. We need to ensure that the stack
18239 is 64-bit aligned before we start to save iWMMXt registers,
18240 and also before we start to create locals. (A local variable
18241 might be a double or long long which we will load/store using
18242 an iWMMXt instruction). Therefore we need to push another
18243 ARM register, so that the stack will be 64-bit aligned. We
18244 try to avoid using the arg registers (r0 -r3) as they might be
18245 used to pass values in a tail call. */
18246 for (reg
= 4; reg
<= 12; reg
++)
18247 if ((save_reg_mask
& (1 << reg
)) == 0)
18251 save_reg_mask
|= (1 << reg
);
18254 cfun
->machine
->sibcall_blocked
= 1;
18255 save_reg_mask
|= (1 << 3);
18259 /* We may need to push an additional register for use initializing the
18260 PIC base register. */
18261 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
18262 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
18264 reg
= thumb_find_work_register (1 << 4);
18265 if (!call_used_regs
[reg
])
18266 save_reg_mask
|= (1 << reg
);
18269 return save_reg_mask
;
18272 /* Compute a bit mask of which registers need to be
18273 saved on the stack for the current function. */
18274 static unsigned long
18275 thumb1_compute_save_reg_mask (void)
18277 unsigned long mask
;
18281 for (reg
= 0; reg
< 12; reg
++)
18282 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
18286 && !TARGET_SINGLE_PIC_BASE
18287 && arm_pic_register
!= INVALID_REGNUM
18288 && crtl
->uses_pic_offset_table
)
18289 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
18291 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18292 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
18293 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
18295 /* LR will also be pushed if any lo regs are pushed. */
18296 if (mask
& 0xff || thumb_force_lr_save ())
18297 mask
|= (1 << LR_REGNUM
);
18299 /* Make sure we have a low work register if we need one.
18300 We will need one if we are going to push a high register,
18301 but we are not currently intending to push a low register. */
18302 if ((mask
& 0xff) == 0
18303 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
18305 /* Use thumb_find_work_register to choose which register
18306 we will use. If the register is live then we will
18307 have to push it. Use LAST_LO_REGNUM as our fallback
18308 choice for the register to select. */
18309 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
18310 /* Make sure the register returned by thumb_find_work_register is
18311 not part of the return value. */
18312 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
18313 reg
= LAST_LO_REGNUM
;
18315 if (callee_saved_reg_p (reg
))
18319 /* The 504 below is 8 bytes less than 512 because there are two possible
18320 alignment words. We can't tell here if they will be present or not so we
18321 have to play it safe and assume that they are. */
18322 if ((CALLER_INTERWORKING_SLOT_SIZE
+
18323 ROUND_UP_WORD (get_frame_size ()) +
18324 crtl
->outgoing_args_size
) >= 504)
18326 /* This is the same as the code in thumb1_expand_prologue() which
18327 determines which register to use for stack decrement. */
18328 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
18329 if (mask
& (1 << reg
))
18332 if (reg
> LAST_LO_REGNUM
)
18334 /* Make sure we have a register available for stack decrement. */
18335 mask
|= 1 << LAST_LO_REGNUM
;
18343 /* Return the number of bytes required to save VFP registers. */
18345 arm_get_vfp_saved_size (void)
18347 unsigned int regno
;
18352 /* Space for saved VFP registers. */
18353 if (TARGET_HARD_FLOAT
)
18356 for (regno
= FIRST_VFP_REGNUM
;
18357 regno
< LAST_VFP_REGNUM
;
18360 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
18361 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
18365 /* Workaround ARM10 VFPr1 bug. */
18366 if (count
== 2 && !arm_arch6
)
18368 saved
+= count
* 8;
18377 if (count
== 2 && !arm_arch6
)
18379 saved
+= count
* 8;
18386 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18387 everything bar the final return instruction. If simple_return is true,
18388 then do not output epilogue, because it has already been emitted in RTL. */
18390 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
18391 bool simple_return
)
18393 char conditional
[10];
18396 unsigned long live_regs_mask
;
18397 unsigned long func_type
;
18398 arm_stack_offsets
*offsets
;
18400 func_type
= arm_current_func_type ();
18402 if (IS_NAKED (func_type
))
18405 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
18407 /* If this function was declared non-returning, and we have
18408 found a tail call, then we have to trust that the called
18409 function won't return. */
18414 /* Otherwise, trap an attempted return by aborting. */
18416 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
18418 assemble_external_libcall (ops
[1]);
18419 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
18425 gcc_assert (!cfun
->calls_alloca
|| really_return
);
18427 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
18429 cfun
->machine
->return_used_this_function
= 1;
18431 offsets
= arm_get_frame_offsets ();
18432 live_regs_mask
= offsets
->saved_regs_mask
;
18434 if (!simple_return
&& live_regs_mask
)
18436 const char * return_reg
;
18438 /* If we do not have any special requirements for function exit
18439 (e.g. interworking) then we can load the return address
18440 directly into the PC. Otherwise we must load it into LR. */
18442 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
18443 return_reg
= reg_names
[PC_REGNUM
];
18445 return_reg
= reg_names
[LR_REGNUM
];
18447 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
18449 /* There are three possible reasons for the IP register
18450 being saved. 1) a stack frame was created, in which case
18451 IP contains the old stack pointer, or 2) an ISR routine
18452 corrupted it, or 3) it was saved to align the stack on
18453 iWMMXt. In case 1, restore IP into SP, otherwise just
18455 if (frame_pointer_needed
)
18457 live_regs_mask
&= ~ (1 << IP_REGNUM
);
18458 live_regs_mask
|= (1 << SP_REGNUM
);
18461 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
18464 /* On some ARM architectures it is faster to use LDR rather than
18465 LDM to load a single register. On other architectures, the
18466 cost is the same. In 26 bit mode, or for exception handlers,
18467 we have to use LDM to load the PC so that the CPSR is also
18469 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
18470 if (live_regs_mask
== (1U << reg
))
18473 if (reg
<= LAST_ARM_REGNUM
18474 && (reg
!= LR_REGNUM
18476 || ! IS_INTERRUPT (func_type
)))
18478 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
18479 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
18486 /* Generate the load multiple instruction to restore the
18487 registers. Note we can get here, even if
18488 frame_pointer_needed is true, but only if sp already
18489 points to the base of the saved core registers. */
18490 if (live_regs_mask
& (1 << SP_REGNUM
))
18492 unsigned HOST_WIDE_INT stack_adjust
;
18494 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
18495 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
18497 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
18498 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
18501 /* If we can't use ldmib (SA110 bug),
18502 then try to pop r3 instead. */
18504 live_regs_mask
|= 1 << 3;
18506 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
18509 /* For interrupt returns we have to use an LDM rather than
18510 a POP so that we can use the exception return variant. */
18511 else if (IS_INTERRUPT (func_type
))
18512 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
18514 sprintf (instr
, "pop%s\t{", conditional
);
18516 p
= instr
+ strlen (instr
);
18518 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
18519 if (live_regs_mask
& (1 << reg
))
18521 int l
= strlen (reg_names
[reg
]);
18527 memcpy (p
, ", ", 2);
18531 memcpy (p
, "%|", 2);
18532 memcpy (p
+ 2, reg_names
[reg
], l
);
18536 if (live_regs_mask
& (1 << LR_REGNUM
))
18538 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
18539 /* If returning from an interrupt, restore the CPSR. */
18540 if (IS_INTERRUPT (func_type
))
18547 output_asm_insn (instr
, & operand
);
18549 /* See if we need to generate an extra instruction to
18550 perform the actual function return. */
18552 && func_type
!= ARM_FT_INTERWORKED
18553 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
18555 /* The return has already been handled
18556 by loading the LR into the PC. */
18563 switch ((int) ARM_FUNC_TYPE (func_type
))
18567 /* ??? This is wrong for unified assembly syntax. */
18568 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
18571 case ARM_FT_INTERWORKED
:
18572 gcc_assert (arm_arch5
|| arm_arch4t
);
18573 sprintf (instr
, "bx%s\t%%|lr", conditional
);
18576 case ARM_FT_EXCEPTION
:
18577 /* ??? This is wrong for unified assembly syntax. */
18578 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
18582 /* Use bx if it's available. */
18583 if (arm_arch5
|| arm_arch4t
)
18584 sprintf (instr
, "bx%s\t%%|lr", conditional
);
18586 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
18590 output_asm_insn (instr
, & operand
);
18596 /* Write the function name into the code section, directly preceding
18597 the function prologue.
18599 Code will be output similar to this:
18601 .ascii "arm_poke_function_name", 0
18604 .word 0xff000000 + (t1 - t0)
18605 arm_poke_function_name
18607 stmfd sp!, {fp, ip, lr, pc}
18610 When performing a stack backtrace, code can inspect the value
18611 of 'pc' stored at 'fp' + 0. If the trace function then looks
18612 at location pc - 12 and the top 8 bits are set, then we know
18613 that there is a function name embedded immediately preceding this
18614 location and has length ((pc[-3]) & 0xff000000).
18616 We assume that pc is declared as a pointer to an unsigned long.
18618 It is of no benefit to output the function name if we are assembling
18619 a leaf function. These function types will not contain a stack
18620 backtrace structure, therefore it is not possible to determine the
18623 arm_poke_function_name (FILE *stream
, const char *name
)
18625 unsigned long alignlength
;
18626 unsigned long length
;
18629 length
= strlen (name
) + 1;
18630 alignlength
= ROUND_UP_WORD (length
);
18632 ASM_OUTPUT_ASCII (stream
, name
, length
);
18633 ASM_OUTPUT_ALIGN (stream
, 2);
18634 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
18635 assemble_aligned_integer (UNITS_PER_WORD
, x
);
18638 /* Place some comments into the assembler stream
18639 describing the current function. */
18641 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
18643 unsigned long func_type
;
18645 /* ??? Do we want to print some of the below anyway? */
18649 /* Sanity check. */
18650 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
18652 func_type
= arm_current_func_type ();
18654 switch ((int) ARM_FUNC_TYPE (func_type
))
18657 case ARM_FT_NORMAL
:
18659 case ARM_FT_INTERWORKED
:
18660 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
18663 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
18666 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
18668 case ARM_FT_EXCEPTION
:
18669 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
18673 if (IS_NAKED (func_type
))
18674 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
18676 if (IS_VOLATILE (func_type
))
18677 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
18679 if (IS_NESTED (func_type
))
18680 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
18681 if (IS_STACKALIGN (func_type
))
18682 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
18684 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
18686 crtl
->args
.pretend_args_size
, frame_size
);
18688 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
18689 frame_pointer_needed
,
18690 cfun
->machine
->uses_anonymous_args
);
18692 if (cfun
->machine
->lr_save_eliminated
)
18693 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
18695 if (crtl
->calls_eh_return
)
18696 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
18701 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
18702 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
18704 arm_stack_offsets
*offsets
;
18710 /* Emit any call-via-reg trampolines that are needed for v4t support
18711 of call_reg and call_value_reg type insns. */
18712 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
18714 rtx label
= cfun
->machine
->call_via
[regno
];
18718 switch_to_section (function_section (current_function_decl
));
18719 targetm
.asm_out
.internal_label (asm_out_file
, "L",
18720 CODE_LABEL_NUMBER (label
));
18721 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
18725 /* ??? Probably not safe to set this here, since it assumes that a
18726 function will be emitted as assembly immediately after we generate
18727 RTL for it. This does not happen for inline functions. */
18728 cfun
->machine
->return_used_this_function
= 0;
18730 else /* TARGET_32BIT */
18732 /* We need to take into account any stack-frame rounding. */
18733 offsets
= arm_get_frame_offsets ();
18735 gcc_assert (!use_return_insn (FALSE
, NULL
)
18736 || (cfun
->machine
->return_used_this_function
!= 0)
18737 || offsets
->saved_regs
== offsets
->outgoing_args
18738 || frame_pointer_needed
);
18742 /* Generate and emit a sequence of insns equivalent to PUSH, but using
18743 STR and STRD. If an even number of registers are being pushed, one
18744 or more STRD patterns are created for each register pair. If an
18745 odd number of registers are pushed, emit an initial STR followed by
18746 as many STRD instructions as are needed. This works best when the
18747 stack is initially 64-bit aligned (the normal case), since it
18748 ensures that each STRD is also 64-bit aligned. */
18750 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
18755 rtx par
= NULL_RTX
;
18756 rtx dwarf
= NULL_RTX
;
18760 num_regs
= bit_count (saved_regs_mask
);
18762 /* Must be at least one register to save, and can't save SP or PC. */
18763 gcc_assert (num_regs
> 0 && num_regs
<= 14);
18764 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
18765 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
18767 /* Create sequence for DWARF info. All the frame-related data for
18768 debugging is held in this wrapper. */
18769 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
18771 /* Describe the stack adjustment. */
18772 tmp
= gen_rtx_SET (stack_pointer_rtx
,
18773 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
18774 RTX_FRAME_RELATED_P (tmp
) = 1;
18775 XVECEXP (dwarf
, 0, 0) = tmp
;
18777 /* Find the first register. */
18778 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
18783 /* If there's an odd number of registers to push. Start off by
18784 pushing a single register. This ensures that subsequent strd
18785 operations are dword aligned (assuming that SP was originally
18786 64-bit aligned). */
18787 if ((num_regs
& 1) != 0)
18789 rtx reg
, mem
, insn
;
18791 reg
= gen_rtx_REG (SImode
, regno
);
18793 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
18794 stack_pointer_rtx
));
18796 mem
= gen_frame_mem (Pmode
,
18798 (Pmode
, stack_pointer_rtx
,
18799 plus_constant (Pmode
, stack_pointer_rtx
,
18802 tmp
= gen_rtx_SET (mem
, reg
);
18803 RTX_FRAME_RELATED_P (tmp
) = 1;
18804 insn
= emit_insn (tmp
);
18805 RTX_FRAME_RELATED_P (insn
) = 1;
18806 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
18807 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
18808 RTX_FRAME_RELATED_P (tmp
) = 1;
18811 XVECEXP (dwarf
, 0, i
) = tmp
;
18815 while (i
< num_regs
)
18816 if (saved_regs_mask
& (1 << regno
))
18818 rtx reg1
, reg2
, mem1
, mem2
;
18819 rtx tmp0
, tmp1
, tmp2
;
18822 /* Find the register to pair with this one. */
18823 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
18827 reg1
= gen_rtx_REG (SImode
, regno
);
18828 reg2
= gen_rtx_REG (SImode
, regno2
);
18835 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18838 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18840 -4 * (num_regs
- 1)));
18841 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
18842 plus_constant (Pmode
, stack_pointer_rtx
,
18844 tmp1
= gen_rtx_SET (mem1
, reg1
);
18845 tmp2
= gen_rtx_SET (mem2
, reg2
);
18846 RTX_FRAME_RELATED_P (tmp0
) = 1;
18847 RTX_FRAME_RELATED_P (tmp1
) = 1;
18848 RTX_FRAME_RELATED_P (tmp2
) = 1;
18849 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
18850 XVECEXP (par
, 0, 0) = tmp0
;
18851 XVECEXP (par
, 0, 1) = tmp1
;
18852 XVECEXP (par
, 0, 2) = tmp2
;
18853 insn
= emit_insn (par
);
18854 RTX_FRAME_RELATED_P (insn
) = 1;
18855 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
18859 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18862 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
18865 tmp1
= gen_rtx_SET (mem1
, reg1
);
18866 tmp2
= gen_rtx_SET (mem2
, reg2
);
18867 RTX_FRAME_RELATED_P (tmp1
) = 1;
18868 RTX_FRAME_RELATED_P (tmp2
) = 1;
18869 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
18870 XVECEXP (par
, 0, 0) = tmp1
;
18871 XVECEXP (par
, 0, 1) = tmp2
;
18875 /* Create unwind information. This is an approximation. */
18876 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
18877 plus_constant (Pmode
,
18881 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
18882 plus_constant (Pmode
,
18887 RTX_FRAME_RELATED_P (tmp1
) = 1;
18888 RTX_FRAME_RELATED_P (tmp2
) = 1;
18889 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
18890 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
18892 regno
= regno2
+ 1;
18900 /* STRD in ARM mode requires consecutive registers. This function emits STRD
18901 whenever possible, otherwise it emits single-word stores. The first store
18902 also allocates stack space for all saved registers, using writeback with
18903 post-addressing mode. All other stores use offset addressing. If no STRD
18904 can be emitted, this function emits a sequence of single-word stores,
18905 and not an STM as before, because single-word stores provide more freedom
18906 scheduling and can be turned into an STM by peephole optimizations. */
18908 arm_emit_strd_push (unsigned long saved_regs_mask
)
18911 int i
, j
, dwarf_index
= 0;
18913 rtx dwarf
= NULL_RTX
;
18914 rtx insn
= NULL_RTX
;
18917 /* TODO: A more efficient code can be emitted by changing the
18918 layout, e.g., first push all pairs that can use STRD to keep the
18919 stack aligned, and then push all other registers. */
18920 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
18921 if (saved_regs_mask
& (1 << i
))
18924 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
18925 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
18926 gcc_assert (num_regs
> 0);
18928 /* Create sequence for DWARF info. */
18929 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
18931 /* For dwarf info, we generate explicit stack update. */
18932 tmp
= gen_rtx_SET (stack_pointer_rtx
,
18933 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
18934 RTX_FRAME_RELATED_P (tmp
) = 1;
18935 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
18937 /* Save registers. */
18938 offset
= - 4 * num_regs
;
18940 while (j
<= LAST_ARM_REGNUM
)
18941 if (saved_regs_mask
& (1 << j
))
18944 && (saved_regs_mask
& (1 << (j
+ 1))))
18946 /* Current register and previous register form register pair for
18947 which STRD can be generated. */
18950 /* Allocate stack space for all saved registers. */
18951 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
18952 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
18953 mem
= gen_frame_mem (DImode
, tmp
);
18956 else if (offset
> 0)
18957 mem
= gen_frame_mem (DImode
,
18958 plus_constant (Pmode
,
18962 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
18964 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
18965 RTX_FRAME_RELATED_P (tmp
) = 1;
18966 tmp
= emit_insn (tmp
);
18968 /* Record the first store insn. */
18969 if (dwarf_index
== 1)
18972 /* Generate dwarf info. */
18973 mem
= gen_frame_mem (SImode
,
18974 plus_constant (Pmode
,
18977 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
18978 RTX_FRAME_RELATED_P (tmp
) = 1;
18979 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
18981 mem
= gen_frame_mem (SImode
,
18982 plus_constant (Pmode
,
18985 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
18986 RTX_FRAME_RELATED_P (tmp
) = 1;
18987 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
18994 /* Emit a single word store. */
18997 /* Allocate stack space for all saved registers. */
18998 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
18999 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19000 mem
= gen_frame_mem (SImode
, tmp
);
19003 else if (offset
> 0)
19004 mem
= gen_frame_mem (SImode
,
19005 plus_constant (Pmode
,
19009 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19011 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19012 RTX_FRAME_RELATED_P (tmp
) = 1;
19013 tmp
= emit_insn (tmp
);
19015 /* Record the first store insn. */
19016 if (dwarf_index
== 1)
19019 /* Generate dwarf info. */
19020 mem
= gen_frame_mem (SImode
,
19021 plus_constant(Pmode
,
19024 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
19025 RTX_FRAME_RELATED_P (tmp
) = 1;
19026 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19035 /* Attach dwarf info to the first insn we generate. */
19036 gcc_assert (insn
!= NULL_RTX
);
19037 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19038 RTX_FRAME_RELATED_P (insn
) = 1;
19041 /* Generate and emit an insn that we will recognize as a push_multi.
19042 Unfortunately, since this insn does not reflect very well the actual
19043 semantics of the operation, we need to annotate the insn for the benefit
19044 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19045 MASK for registers that should be annotated for DWARF2 frame unwind
19048 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
19051 int num_dwarf_regs
= 0;
19055 int dwarf_par_index
;
19058 /* We don't record the PC in the dwarf frame information. */
19059 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
19061 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19063 if (mask
& (1 << i
))
19065 if (dwarf_regs_mask
& (1 << i
))
19069 gcc_assert (num_regs
&& num_regs
<= 16);
19070 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
19072 /* For the body of the insn we are going to generate an UNSPEC in
19073 parallel with several USEs. This allows the insn to be recognized
19074 by the push_multi pattern in the arm.md file.
19076 The body of the insn looks something like this:
19079 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19080 (const_int:SI <num>)))
19081 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19087 For the frame note however, we try to be more explicit and actually
19088 show each register being stored into the stack frame, plus a (single)
19089 decrement of the stack pointer. We do it this way in order to be
19090 friendly to the stack unwinding code, which only wants to see a single
19091 stack decrement per instruction. The RTL we generate for the note looks
19092 something like this:
19095 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19096 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19097 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19098 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19102 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19103 instead we'd have a parallel expression detailing all
19104 the stores to the various memory addresses so that debug
19105 information is more up-to-date. Remember however while writing
19106 this to take care of the constraints with the push instruction.
19108 Note also that this has to be taken care of for the VFP registers.
19110 For more see PR43399. */
19112 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
19113 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
19114 dwarf_par_index
= 1;
19116 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19118 if (mask
& (1 << i
))
19120 reg
= gen_rtx_REG (SImode
, i
);
19122 XVECEXP (par
, 0, 0)
19123 = gen_rtx_SET (gen_frame_mem
19125 gen_rtx_PRE_MODIFY (Pmode
,
19128 (Pmode
, stack_pointer_rtx
,
19131 gen_rtx_UNSPEC (BLKmode
,
19132 gen_rtvec (1, reg
),
19133 UNSPEC_PUSH_MULT
));
19135 if (dwarf_regs_mask
& (1 << i
))
19137 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
19139 RTX_FRAME_RELATED_P (tmp
) = 1;
19140 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19147 for (j
= 1, i
++; j
< num_regs
; i
++)
19149 if (mask
& (1 << i
))
19151 reg
= gen_rtx_REG (SImode
, i
);
19153 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
19155 if (dwarf_regs_mask
& (1 << i
))
19158 = gen_rtx_SET (gen_frame_mem
19160 plus_constant (Pmode
, stack_pointer_rtx
,
19163 RTX_FRAME_RELATED_P (tmp
) = 1;
19164 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
19171 par
= emit_insn (par
);
19173 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19174 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19175 RTX_FRAME_RELATED_P (tmp
) = 1;
19176 XVECEXP (dwarf
, 0, 0) = tmp
;
19178 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19183 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19184 SIZE is the offset to be adjusted.
19185 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19187 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
19191 RTX_FRAME_RELATED_P (insn
) = 1;
19192 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
19193 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
19196 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19197 SAVED_REGS_MASK shows which registers need to be restored.
19199 Unfortunately, since this insn does not reflect very well the actual
19200 semantics of the operation, we need to annotate the insn for the benefit
19201 of DWARF2 frame unwind information. */
19203 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
19208 rtx dwarf
= NULL_RTX
;
19210 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
19214 offset_adj
= return_in_pc
? 1 : 0;
19215 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19216 if (saved_regs_mask
& (1 << i
))
19219 gcc_assert (num_regs
&& num_regs
<= 16);
19221 /* If SP is in reglist, then we don't emit SP update insn. */
19222 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
19224 /* The parallel needs to hold num_regs SETs
19225 and one SET for the stack update. */
19226 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
19229 XVECEXP (par
, 0, 0) = ret_rtx
;
19233 /* Increment the stack pointer, based on there being
19234 num_regs 4-byte registers to restore. */
19235 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19236 plus_constant (Pmode
,
19239 RTX_FRAME_RELATED_P (tmp
) = 1;
19240 XVECEXP (par
, 0, offset_adj
) = tmp
;
19243 /* Now restore every reg, which may include PC. */
19244 for (j
= 0, i
= 0; j
< num_regs
; i
++)
19245 if (saved_regs_mask
& (1 << i
))
19247 reg
= gen_rtx_REG (SImode
, i
);
19248 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
19250 /* Emit single load with writeback. */
19251 tmp
= gen_frame_mem (SImode
,
19252 gen_rtx_POST_INC (Pmode
,
19253 stack_pointer_rtx
));
19254 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
19255 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19259 tmp
= gen_rtx_SET (reg
,
19262 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
19263 RTX_FRAME_RELATED_P (tmp
) = 1;
19264 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
19266 /* We need to maintain a sequence for DWARF info too. As dwarf info
19267 should not have PC, skip PC. */
19268 if (i
!= PC_REGNUM
)
19269 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19275 par
= emit_jump_insn (par
);
19277 par
= emit_insn (par
);
19279 REG_NOTES (par
) = dwarf
;
19281 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
19282 stack_pointer_rtx
, stack_pointer_rtx
);
19285 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19286 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19288 Unfortunately, since this insn does not reflect very well the actual
19289 semantics of the operation, we need to annotate the insn for the benefit
19290 of DWARF2 frame unwind information. */
19292 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
19296 rtx dwarf
= NULL_RTX
;
19299 gcc_assert (num_regs
&& num_regs
<= 32);
19301 /* Workaround ARM10 VFPr1 bug. */
19302 if (num_regs
== 2 && !arm_arch6
)
19304 if (first_reg
== 15)
19310 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19311 there could be up to 32 D-registers to restore.
19312 If there are more than 16 D-registers, make two recursive calls,
19313 each of which emits one pop_multi instruction. */
19316 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
19317 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
19321 /* The parallel needs to hold num_regs SETs
19322 and one SET for the stack update. */
19323 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19325 /* Increment the stack pointer, based on there being
19326 num_regs 8-byte registers to restore. */
19327 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
19328 RTX_FRAME_RELATED_P (tmp
) = 1;
19329 XVECEXP (par
, 0, 0) = tmp
;
19331 /* Now show every reg that will be restored, using a SET for each. */
19332 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
19334 reg
= gen_rtx_REG (DFmode
, i
);
19336 tmp
= gen_rtx_SET (reg
,
19339 plus_constant (Pmode
, base_reg
, 8 * j
)));
19340 RTX_FRAME_RELATED_P (tmp
) = 1;
19341 XVECEXP (par
, 0, j
+ 1) = tmp
;
19343 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19348 par
= emit_insn (par
);
19349 REG_NOTES (par
) = dwarf
;
19351 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
19352 if (REGNO (base_reg
) == IP_REGNUM
)
19354 RTX_FRAME_RELATED_P (par
) = 1;
19355 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
19358 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
19359 base_reg
, base_reg
);
19362 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19363 number of registers are being popped, multiple LDRD patterns are created for
19364 all register pairs. If odd number of registers are popped, last register is
19365 loaded by using LDR pattern. */
19367 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
19371 rtx par
= NULL_RTX
;
19372 rtx dwarf
= NULL_RTX
;
19373 rtx tmp
, reg
, tmp1
;
19374 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
19376 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19377 if (saved_regs_mask
& (1 << i
))
19380 gcc_assert (num_regs
&& num_regs
<= 16);
19382 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19383 to be popped. So, if num_regs is even, now it will become odd,
19384 and we can generate pop with PC. If num_regs is odd, it will be
19385 even now, and ldr with return can be generated for PC. */
19389 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19391 /* Var j iterates over all the registers to gather all the registers in
19392 saved_regs_mask. Var i gives index of saved registers in stack frame.
19393 A PARALLEL RTX of register-pair is created here, so that pattern for
19394 LDRD can be matched. As PC is always last register to be popped, and
19395 we have already decremented num_regs if PC, we don't have to worry
19396 about PC in this loop. */
19397 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
19398 if (saved_regs_mask
& (1 << j
))
19400 /* Create RTX for memory load. */
19401 reg
= gen_rtx_REG (SImode
, j
);
19402 tmp
= gen_rtx_SET (reg
,
19403 gen_frame_mem (SImode
,
19404 plus_constant (Pmode
,
19405 stack_pointer_rtx
, 4 * i
)));
19406 RTX_FRAME_RELATED_P (tmp
) = 1;
19410 /* When saved-register index (i) is even, the RTX to be emitted is
19411 yet to be created. Hence create it first. The LDRD pattern we
19412 are generating is :
19413 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19414 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19415 where target registers need not be consecutive. */
19416 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19420 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19421 added as 0th element and if i is odd, reg_i is added as 1st element
19422 of LDRD pattern shown above. */
19423 XVECEXP (par
, 0, (i
% 2)) = tmp
;
19424 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19428 /* When saved-register index (i) is odd, RTXs for both the registers
19429 to be loaded are generated in above given LDRD pattern, and the
19430 pattern can be emitted now. */
19431 par
= emit_insn (par
);
19432 REG_NOTES (par
) = dwarf
;
19433 RTX_FRAME_RELATED_P (par
) = 1;
19439 /* If the number of registers pushed is odd AND return_in_pc is false OR
19440 number of registers are even AND return_in_pc is true, last register is
19441 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19442 then LDR with post increment. */
19444 /* Increment the stack pointer, based on there being
19445 num_regs 4-byte registers to restore. */
19446 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19447 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
19448 RTX_FRAME_RELATED_P (tmp
) = 1;
19449 tmp
= emit_insn (tmp
);
19452 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
19453 stack_pointer_rtx
, stack_pointer_rtx
);
19458 if (((num_regs
% 2) == 1 && !return_in_pc
)
19459 || ((num_regs
% 2) == 0 && return_in_pc
))
19461 /* Scan for the single register to be popped. Skip until the saved
19462 register is found. */
19463 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
19465 /* Gen LDR with post increment here. */
19466 tmp1
= gen_rtx_MEM (SImode
,
19467 gen_rtx_POST_INC (SImode
,
19468 stack_pointer_rtx
));
19469 set_mem_alias_set (tmp1
, get_frame_alias_set ());
19471 reg
= gen_rtx_REG (SImode
, j
);
19472 tmp
= gen_rtx_SET (reg
, tmp1
);
19473 RTX_FRAME_RELATED_P (tmp
) = 1;
19474 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
19478 /* If return_in_pc, j must be PC_REGNUM. */
19479 gcc_assert (j
== PC_REGNUM
);
19480 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19481 XVECEXP (par
, 0, 0) = ret_rtx
;
19482 XVECEXP (par
, 0, 1) = tmp
;
19483 par
= emit_jump_insn (par
);
19487 par
= emit_insn (tmp
);
19488 REG_NOTES (par
) = dwarf
;
19489 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
19490 stack_pointer_rtx
, stack_pointer_rtx
);
19494 else if ((num_regs
% 2) == 1 && return_in_pc
)
19496 /* There are 2 registers to be popped. So, generate the pattern
19497 pop_multiple_with_stack_update_and_return to pop in PC. */
19498 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
19504 /* LDRD in ARM mode needs consecutive registers as operands. This function
19505 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
19506 offset addressing and then generates one separate stack udpate. This provides
19507 more scheduling freedom, compared to writeback on every load. However,
19508 if the function returns using load into PC directly
19509 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
19510 before the last load. TODO: Add a peephole optimization to recognize
19511 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
19512 peephole optimization to merge the load at stack-offset zero
19513 with the stack update instruction using load with writeback
19514 in post-index addressing mode. */
19516 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
19520 rtx par
= NULL_RTX
;
19521 rtx dwarf
= NULL_RTX
;
19524 /* Restore saved registers. */
19525 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
19527 while (j
<= LAST_ARM_REGNUM
)
19528 if (saved_regs_mask
& (1 << j
))
19531 && (saved_regs_mask
& (1 << (j
+ 1)))
19532 && (j
+ 1) != PC_REGNUM
)
19534 /* Current register and next register form register pair for which
19535 LDRD can be generated. PC is always the last register popped, and
19536 we handle it separately. */
19538 mem
= gen_frame_mem (DImode
,
19539 plus_constant (Pmode
,
19543 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19545 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
19546 tmp
= emit_insn (tmp
);
19547 RTX_FRAME_RELATED_P (tmp
) = 1;
19549 /* Generate dwarf info. */
19551 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
19552 gen_rtx_REG (SImode
, j
),
19554 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
19555 gen_rtx_REG (SImode
, j
+ 1),
19558 REG_NOTES (tmp
) = dwarf
;
19563 else if (j
!= PC_REGNUM
)
19565 /* Emit a single word load. */
19567 mem
= gen_frame_mem (SImode
,
19568 plus_constant (Pmode
,
19572 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
19574 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
19575 tmp
= emit_insn (tmp
);
19576 RTX_FRAME_RELATED_P (tmp
) = 1;
19578 /* Generate dwarf info. */
19579 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
19580 gen_rtx_REG (SImode
, j
),
19586 else /* j == PC_REGNUM */
19592 /* Update the stack. */
19595 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19596 plus_constant (Pmode
,
19599 tmp
= emit_insn (tmp
);
19600 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
19601 stack_pointer_rtx
, stack_pointer_rtx
);
19605 if (saved_regs_mask
& (1 << PC_REGNUM
))
19607 /* Only PC is to be popped. */
19608 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19609 XVECEXP (par
, 0, 0) = ret_rtx
;
19610 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
19611 gen_frame_mem (SImode
,
19612 gen_rtx_POST_INC (SImode
,
19613 stack_pointer_rtx
)));
19614 RTX_FRAME_RELATED_P (tmp
) = 1;
19615 XVECEXP (par
, 0, 1) = tmp
;
19616 par
= emit_jump_insn (par
);
19618 /* Generate dwarf info. */
19619 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
19620 gen_rtx_REG (SImode
, PC_REGNUM
),
19622 REG_NOTES (par
) = dwarf
;
19623 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
19624 stack_pointer_rtx
, stack_pointer_rtx
);
19628 /* Calculate the size of the return value that is passed in registers. */
19630 arm_size_return_regs (void)
19634 if (crtl
->return_rtx
!= 0)
19635 mode
= GET_MODE (crtl
->return_rtx
);
19637 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
19639 return GET_MODE_SIZE (mode
);
19642 /* Return true if the current function needs to save/restore LR. */
19644 thumb_force_lr_save (void)
19646 return !cfun
->machine
->lr_save_eliminated
19647 && (!leaf_function_p ()
19648 || thumb_far_jump_used_p ()
19649 || df_regs_ever_live_p (LR_REGNUM
));
19652 /* We do not know if r3 will be available because
19653 we do have an indirect tailcall happening in this
19654 particular case. */
19656 is_indirect_tailcall_p (rtx call
)
19658 rtx pat
= PATTERN (call
);
19660 /* Indirect tail call. */
19661 pat
= XVECEXP (pat
, 0, 0);
19662 if (GET_CODE (pat
) == SET
)
19663 pat
= SET_SRC (pat
);
19665 pat
= XEXP (XEXP (pat
, 0), 0);
19666 return REG_P (pat
);
19669 /* Return true if r3 is used by any of the tail call insns in the
19670 current function. */
19672 any_sibcall_could_use_r3 (void)
19677 if (!crtl
->tail_call_emit
)
19679 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
19680 if (e
->flags
& EDGE_SIBCALL
)
19682 rtx_insn
*call
= BB_END (e
->src
);
19683 if (!CALL_P (call
))
19684 call
= prev_nonnote_nondebug_insn (call
);
19685 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
19686 if (find_regno_fusage (call
, USE
, 3)
19687 || is_indirect_tailcall_p (call
))
19694 /* Compute the distance from register FROM to register TO.
19695 These can be the arg pointer (26), the soft frame pointer (25),
19696 the stack pointer (13) or the hard frame pointer (11).
19697 In thumb mode r7 is used as the soft frame pointer, if needed.
19698 Typical stack layout looks like this:
19700 old stack pointer -> | |
19703 | | saved arguments for
19704 | | vararg functions
19707 hard FP & arg pointer -> | | \
19715 soft frame pointer -> | | /
19720 locals base pointer -> | | /
19725 current stack pointer -> | | /
19728 For a given function some or all of these stack components
19729 may not be needed, giving rise to the possibility of
19730 eliminating some of the registers.
19732 The values returned by this function must reflect the behavior
19733 of arm_expand_prologue() and arm_compute_save_reg_mask().
19735 The sign of the number returned reflects the direction of stack
19736 growth, so the values are positive for all eliminations except
19737 from the soft frame pointer to the hard frame pointer.
19739 SFP may point just inside the local variables block to ensure correct
19743 /* Calculate stack offsets. These are used to calculate register elimination
19744 offsets and in prologue/epilogue code. Also calculates which registers
19745 should be saved. */
19747 static arm_stack_offsets
*
19748 arm_get_frame_offsets (void)
19750 struct arm_stack_offsets
*offsets
;
19751 unsigned long func_type
;
19755 HOST_WIDE_INT frame_size
;
19758 offsets
= &cfun
->machine
->stack_offsets
;
19760 /* We need to know if we are a leaf function. Unfortunately, it
19761 is possible to be called after start_sequence has been called,
19762 which causes get_insns to return the insns for the sequence,
19763 not the function, which will cause leaf_function_p to return
19764 the incorrect result.
19766 to know about leaf functions once reload has completed, and the
19767 frame size cannot be changed after that time, so we can safely
19768 use the cached value. */
19770 if (reload_completed
)
19773 /* Initially this is the size of the local variables. It will translated
19774 into an offset once we have determined the size of preceding data. */
19775 frame_size
= ROUND_UP_WORD (get_frame_size ());
19777 leaf
= leaf_function_p ();
19779 /* Space for variadic functions. */
19780 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
19782 /* In Thumb mode this is incorrect, but never used. */
19784 = (offsets
->saved_args
19785 + arm_compute_static_chain_stack_bytes ()
19786 + (frame_pointer_needed
? 4 : 0));
19790 unsigned int regno
;
19792 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
19793 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
19794 saved
= core_saved
;
19796 /* We know that SP will be doubleword aligned on entry, and we must
19797 preserve that condition at any subroutine call. We also require the
19798 soft frame pointer to be doubleword aligned. */
19800 if (TARGET_REALLY_IWMMXT
)
19802 /* Check for the call-saved iWMMXt registers. */
19803 for (regno
= FIRST_IWMMXT_REGNUM
;
19804 regno
<= LAST_IWMMXT_REGNUM
;
19806 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
19810 func_type
= arm_current_func_type ();
19811 /* Space for saved VFP registers. */
19812 if (! IS_VOLATILE (func_type
)
19813 && TARGET_HARD_FLOAT
)
19814 saved
+= arm_get_vfp_saved_size ();
19816 else /* TARGET_THUMB1 */
19818 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
19819 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
19820 saved
= core_saved
;
19821 if (TARGET_BACKTRACE
)
19825 /* Saved registers include the stack frame. */
19826 offsets
->saved_regs
19827 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
19828 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
19830 /* A leaf function does not need any stack alignment if it has nothing
19832 if (leaf
&& frame_size
== 0
19833 /* However if it calls alloca(), we have a dynamically allocated
19834 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
19835 && ! cfun
->calls_alloca
)
19837 offsets
->outgoing_args
= offsets
->soft_frame
;
19838 offsets
->locals_base
= offsets
->soft_frame
;
19842 /* Ensure SFP has the correct alignment. */
19843 if (ARM_DOUBLEWORD_ALIGN
19844 && (offsets
->soft_frame
& 7))
19846 offsets
->soft_frame
+= 4;
19847 /* Try to align stack by pushing an extra reg. Don't bother doing this
19848 when there is a stack frame as the alignment will be rolled into
19849 the normal stack adjustment. */
19850 if (frame_size
+ crtl
->outgoing_args_size
== 0)
19854 /* Register r3 is caller-saved. Normally it does not need to be
19855 saved on entry by the prologue. However if we choose to save
19856 it for padding then we may confuse the compiler into thinking
19857 a prologue sequence is required when in fact it is not. This
19858 will occur when shrink-wrapping if r3 is used as a scratch
19859 register and there are no other callee-saved writes.
19861 This situation can be avoided when other callee-saved registers
19862 are available and r3 is not mandatory if we choose a callee-saved
19863 register for padding. */
19864 bool prefer_callee_reg_p
= false;
19866 /* If it is safe to use r3, then do so. This sometimes
19867 generates better code on Thumb-2 by avoiding the need to
19868 use 32-bit push/pop instructions. */
19869 if (! any_sibcall_could_use_r3 ()
19870 && arm_size_return_regs () <= 12
19871 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
19873 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
19876 if (!TARGET_THUMB2
)
19877 prefer_callee_reg_p
= true;
19880 || prefer_callee_reg_p
)
19882 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
19884 /* Avoid fixed registers; they may be changed at
19885 arbitrary times so it's unsafe to restore them
19886 during the epilogue. */
19888 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
19898 offsets
->saved_regs
+= 4;
19899 offsets
->saved_regs_mask
|= (1 << reg
);
19904 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
19905 offsets
->outgoing_args
= (offsets
->locals_base
19906 + crtl
->outgoing_args_size
);
19908 if (ARM_DOUBLEWORD_ALIGN
)
19910 /* Ensure SP remains doubleword aligned. */
19911 if (offsets
->outgoing_args
& 7)
19912 offsets
->outgoing_args
+= 4;
19913 gcc_assert (!(offsets
->outgoing_args
& 7));
19920 /* Calculate the relative offsets for the different stack pointers. Positive
19921 offsets are in the direction of stack growth. */
19924 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
19926 arm_stack_offsets
*offsets
;
19928 offsets
= arm_get_frame_offsets ();
19930 /* OK, now we have enough information to compute the distances.
19931 There must be an entry in these switch tables for each pair
19932 of registers in ELIMINABLE_REGS, even if some of the entries
19933 seem to be redundant or useless. */
19936 case ARG_POINTER_REGNUM
:
19939 case THUMB_HARD_FRAME_POINTER_REGNUM
:
19942 case FRAME_POINTER_REGNUM
:
19943 /* This is the reverse of the soft frame pointer
19944 to hard frame pointer elimination below. */
19945 return offsets
->soft_frame
- offsets
->saved_args
;
19947 case ARM_HARD_FRAME_POINTER_REGNUM
:
19948 /* This is only non-zero in the case where the static chain register
19949 is stored above the frame. */
19950 return offsets
->frame
- offsets
->saved_args
- 4;
19952 case STACK_POINTER_REGNUM
:
19953 /* If nothing has been pushed on the stack at all
19954 then this will return -4. This *is* correct! */
19955 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
19958 gcc_unreachable ();
19960 gcc_unreachable ();
19962 case FRAME_POINTER_REGNUM
:
19965 case THUMB_HARD_FRAME_POINTER_REGNUM
:
19968 case ARM_HARD_FRAME_POINTER_REGNUM
:
19969 /* The hard frame pointer points to the top entry in the
19970 stack frame. The soft frame pointer to the bottom entry
19971 in the stack frame. If there is no stack frame at all,
19972 then they are identical. */
19974 return offsets
->frame
- offsets
->soft_frame
;
19976 case STACK_POINTER_REGNUM
:
19977 return offsets
->outgoing_args
- offsets
->soft_frame
;
19980 gcc_unreachable ();
19982 gcc_unreachable ();
19985 /* You cannot eliminate from the stack pointer.
19986 In theory you could eliminate from the hard frame
19987 pointer to the stack pointer, but this will never
19988 happen, since if a stack frame is not needed the
19989 hard frame pointer will never be used. */
19990 gcc_unreachable ();
19994 /* Given FROM and TO register numbers, say whether this elimination is
19995 allowed. Frame pointer elimination is automatically handled.
19997 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
19998 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
19999 pointer, we must eliminate FRAME_POINTER_REGNUM into
20000 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20001 ARG_POINTER_REGNUM. */
20004 arm_can_eliminate (const int from
, const int to
)
20006 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
20007 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
20008 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
20009 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
20013 /* Emit RTL to save coprocessor registers on function entry. Returns the
20014 number of bytes pushed. */
20017 arm_save_coproc_regs(void)
20019 int saved_size
= 0;
20021 unsigned start_reg
;
20024 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
20025 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
20027 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20028 insn
= gen_rtx_MEM (V2SImode
, insn
);
20029 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
20030 RTX_FRAME_RELATED_P (insn
) = 1;
20034 if (TARGET_HARD_FLOAT
)
20036 start_reg
= FIRST_VFP_REGNUM
;
20038 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
20040 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
20041 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
20043 if (start_reg
!= reg
)
20044 saved_size
+= vfp_emit_fstmd (start_reg
,
20045 (reg
- start_reg
) / 2);
20046 start_reg
= reg
+ 2;
20049 if (start_reg
!= reg
)
20050 saved_size
+= vfp_emit_fstmd (start_reg
,
20051 (reg
- start_reg
) / 2);
20057 /* Set the Thumb frame pointer from the stack pointer. */
20060 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
20062 HOST_WIDE_INT amount
;
20065 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
20067 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20068 stack_pointer_rtx
, GEN_INT (amount
)));
20071 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
20072 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20073 expects the first two operands to be the same. */
20076 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20078 hard_frame_pointer_rtx
));
20082 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20083 hard_frame_pointer_rtx
,
20084 stack_pointer_rtx
));
20086 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
20087 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
20088 RTX_FRAME_RELATED_P (dwarf
) = 1;
20089 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20092 RTX_FRAME_RELATED_P (insn
) = 1;
20095 struct scratch_reg
{
20100 /* Return a short-lived scratch register for use as a 2nd scratch register on
20101 function entry after the registers are saved in the prologue. This register
20102 must be released by means of release_scratch_register_on_entry. IP is not
20103 considered since it is always used as the 1st scratch register if available.
20105 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
20106 mask of live registers. */
20109 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
20110 unsigned long live_regs
)
20116 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
20122 for (i
= 4; i
< 11; i
++)
20123 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
20131 /* If IP is used as the 1st scratch register for a nested function,
20132 then either r3 wasn't available or is used to preserve IP. */
20133 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
20135 regno
= (regno1
== 3 ? 2 : 3);
20137 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
20142 sr
->reg
= gen_rtx_REG (SImode
, regno
);
20145 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20146 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
20147 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
20148 plus_constant (Pmode
, stack_pointer_rtx
, -4));
20149 RTX_FRAME_RELATED_P (insn
) = 1;
20150 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
20154 /* Release a scratch register obtained from the preceding function. */
20157 release_scratch_register_on_entry (struct scratch_reg
*sr
)
20161 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
20162 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
20163 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
20164 plus_constant (Pmode
, stack_pointer_rtx
, 4));
20165 RTX_FRAME_RELATED_P (insn
) = 1;
20166 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
20170 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
20172 #if PROBE_INTERVAL > 4096
20173 #error Cannot use indexed addressing mode for stack probing
20176 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
20177 inclusive. These are offsets from the current stack pointer. REGNO1
20178 is the index number of the 1st scratch register and LIVE_REGS is the
20179 mask of live registers. */
20182 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
20183 unsigned int regno1
, unsigned long live_regs
)
20185 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
20187 /* See if we have a constant small number of probes to generate. If so,
20188 that's the easy case. */
20189 if (size
<= PROBE_INTERVAL
)
20191 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
20192 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
20193 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
20196 /* The run-time loop is made up of 10 insns in the generic case while the
20197 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
20198 else if (size
<= 5 * PROBE_INTERVAL
)
20200 HOST_WIDE_INT i
, rem
;
20202 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
20203 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
20204 emit_stack_probe (reg1
);
20206 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
20207 it exceeds SIZE. If only two probes are needed, this will not
20208 generate any code. Then probe at FIRST + SIZE. */
20209 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
20211 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
20212 emit_stack_probe (reg1
);
20215 rem
= size
- (i
- PROBE_INTERVAL
);
20216 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
20218 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
20219 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
20222 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
20225 /* Otherwise, do the same as above, but in a loop. Note that we must be
20226 extra careful with variables wrapping around because we might be at
20227 the very top (or the very bottom) of the address space and we have
20228 to be able to handle this case properly; in particular, we use an
20229 equality test for the loop condition. */
20232 HOST_WIDE_INT rounded_size
;
20233 struct scratch_reg sr
;
20235 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
20237 emit_move_insn (reg1
, GEN_INT (first
));
20240 /* Step 1: round SIZE to the previous multiple of the interval. */
20242 rounded_size
= size
& -PROBE_INTERVAL
;
20243 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
20246 /* Step 2: compute initial and final value of the loop counter. */
20248 /* TEST_ADDR = SP + FIRST. */
20249 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
20251 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
20252 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
20255 /* Step 3: the loop
20259 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
20262 while (TEST_ADDR != LAST_ADDR)
20264 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
20265 until it is equal to ROUNDED_SIZE. */
20267 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
20270 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
20271 that SIZE is equal to ROUNDED_SIZE. */
20273 if (size
!= rounded_size
)
20275 HOST_WIDE_INT rem
= size
- rounded_size
;
20277 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
20279 emit_set_insn (sr
.reg
,
20280 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
20281 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
20282 PROBE_INTERVAL
- rem
));
20285 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
20288 release_scratch_register_on_entry (&sr
);
20291 /* Make sure nothing is scheduled before we are done. */
20292 emit_insn (gen_blockage ());
20295 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
20296 absolute addresses. */
20299 output_probe_stack_range (rtx reg1
, rtx reg2
)
20301 static int labelno
= 0;
20305 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
20308 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
20310 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
20312 xops
[1] = GEN_INT (PROBE_INTERVAL
);
20313 output_asm_insn ("sub\t%0, %0, %1", xops
);
20315 /* Probe at TEST_ADDR. */
20316 output_asm_insn ("str\tr0, [%0, #0]", xops
);
20318 /* Test if TEST_ADDR == LAST_ADDR. */
20320 output_asm_insn ("cmp\t%0, %1", xops
);
20323 fputs ("\tbne\t", asm_out_file
);
20324 assemble_name_raw (asm_out_file
, loop_lab
);
20325 fputc ('\n', asm_out_file
);
20330 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20333 arm_expand_prologue (void)
20338 unsigned long live_regs_mask
;
20339 unsigned long func_type
;
20341 int saved_pretend_args
= 0;
20342 int saved_regs
= 0;
20343 unsigned HOST_WIDE_INT args_to_push
;
20344 HOST_WIDE_INT size
;
20345 arm_stack_offsets
*offsets
;
20348 func_type
= arm_current_func_type ();
20350 /* Naked functions don't have prologues. */
20351 if (IS_NAKED (func_type
))
20353 if (flag_stack_usage_info
)
20354 current_function_static_stack_size
= 0;
20358 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20359 args_to_push
= crtl
->args
.pretend_args_size
;
20361 /* Compute which register we will have to save onto the stack. */
20362 offsets
= arm_get_frame_offsets ();
20363 live_regs_mask
= offsets
->saved_regs_mask
;
20365 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
20367 if (IS_STACKALIGN (func_type
))
20371 /* Handle a word-aligned stack pointer. We generate the following:
20376 <save and restore r0 in normal prologue/epilogue>
20380 The unwinder doesn't need to know about the stack realignment.
20381 Just tell it we saved SP in r0. */
20382 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
20384 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
20385 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
20387 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
20388 RTX_FRAME_RELATED_P (insn
) = 1;
20389 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
20391 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
20393 /* ??? The CFA changes here, which may cause GDB to conclude that it
20394 has entered a different function. That said, the unwind info is
20395 correct, individually, before and after this instruction because
20396 we've described the save of SP, which will override the default
20397 handling of SP as restoring from the CFA. */
20398 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
20401 /* The static chain register is the same as the IP register. If it is
20402 clobbered when creating the frame, we need to save and restore it. */
20403 clobber_ip
= IS_NESTED (func_type
)
20404 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20405 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
20406 && !df_regs_ever_live_p (LR_REGNUM
)
20407 && arm_r3_live_at_start_p ()));
20409 /* Find somewhere to store IP whilst the frame is being created.
20410 We try the following places in order:
20412 1. The last argument register r3 if it is available.
20413 2. A slot on the stack above the frame if there are no
20414 arguments to push onto the stack.
20415 3. Register r3 again, after pushing the argument registers
20416 onto the stack, if this is a varargs function.
20417 4. The last slot on the stack created for the arguments to
20418 push, if this isn't a varargs function.
20420 Note - we only need to tell the dwarf2 backend about the SP
20421 adjustment in the second variant; the static chain register
20422 doesn't need to be unwound, as it doesn't contain a value
20423 inherited from the caller. */
20426 if (!arm_r3_live_at_start_p ())
20427 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20428 else if (args_to_push
== 0)
20432 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20435 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20436 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20439 /* Just tell the dwarf backend that we adjusted SP. */
20440 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
20441 plus_constant (Pmode
, stack_pointer_rtx
,
20443 RTX_FRAME_RELATED_P (insn
) = 1;
20444 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20448 /* Store the args on the stack. */
20449 if (cfun
->machine
->uses_anonymous_args
)
20451 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
20452 (0xf0 >> (args_to_push
/ 4)) & 0xf);
20453 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
20454 saved_pretend_args
= 1;
20460 if (args_to_push
== 4)
20461 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
20463 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
20464 plus_constant (Pmode
,
20468 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
20470 /* Just tell the dwarf backend that we adjusted SP. */
20471 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
20472 plus_constant (Pmode
, stack_pointer_rtx
,
20474 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20477 RTX_FRAME_RELATED_P (insn
) = 1;
20478 fp_offset
= args_to_push
;
20483 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
20485 if (IS_INTERRUPT (func_type
))
20487 /* Interrupt functions must not corrupt any registers.
20488 Creating a frame pointer however, corrupts the IP
20489 register, so we must push it first. */
20490 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
20492 /* Do not set RTX_FRAME_RELATED_P on this insn.
20493 The dwarf stack unwinding code only wants to see one
20494 stack decrement per function, and this is not it. If
20495 this instruction is labeled as being part of the frame
20496 creation sequence then dwarf2out_frame_debug_expr will
20497 die when it encounters the assignment of IP to FP
20498 later on, since the use of SP here establishes SP as
20499 the CFA register and not IP.
20501 Anyway this instruction is not really part of the stack
20502 frame creation although it is part of the prologue. */
20505 insn
= emit_set_insn (ip_rtx
,
20506 plus_constant (Pmode
, stack_pointer_rtx
,
20508 RTX_FRAME_RELATED_P (insn
) = 1;
20513 /* Push the argument registers, or reserve space for them. */
20514 if (cfun
->machine
->uses_anonymous_args
)
20515 insn
= emit_multi_reg_push
20516 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
20517 (0xf0 >> (args_to_push
/ 4)) & 0xf);
20520 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20521 GEN_INT (- args_to_push
)));
20522 RTX_FRAME_RELATED_P (insn
) = 1;
20525 /* If this is an interrupt service routine, and the link register
20526 is going to be pushed, and we're not generating extra
20527 push of IP (needed when frame is needed and frame layout if apcs),
20528 subtracting four from LR now will mean that the function return
20529 can be done with a single instruction. */
20530 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
20531 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
20532 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
20535 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
20537 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
20540 if (live_regs_mask
)
20542 unsigned long dwarf_regs_mask
= live_regs_mask
;
20544 saved_regs
+= bit_count (live_regs_mask
) * 4;
20545 if (optimize_size
&& !frame_pointer_needed
20546 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
20548 /* If no coprocessor registers are being pushed and we don't have
20549 to worry about a frame pointer then push extra registers to
20550 create the stack frame. This is done is a way that does not
20551 alter the frame layout, so is independent of the epilogue. */
20555 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
20557 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
20558 if (frame
&& n
* 4 >= frame
)
20561 live_regs_mask
|= (1 << n
) - 1;
20562 saved_regs
+= frame
;
20567 && current_tune
->prefer_ldrd_strd
20568 && !optimize_function_for_size_p (cfun
))
20570 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
20572 thumb2_emit_strd_push (live_regs_mask
);
20573 else if (TARGET_ARM
20574 && !TARGET_APCS_FRAME
20575 && !IS_INTERRUPT (func_type
))
20576 arm_emit_strd_push (live_regs_mask
);
20579 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
20580 RTX_FRAME_RELATED_P (insn
) = 1;
20585 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
20586 RTX_FRAME_RELATED_P (insn
) = 1;
20590 if (! IS_VOLATILE (func_type
))
20591 saved_regs
+= arm_save_coproc_regs ();
20593 if (frame_pointer_needed
&& TARGET_ARM
)
20595 /* Create the new frame pointer. */
20596 if (TARGET_APCS_FRAME
)
20598 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
20599 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
20600 RTX_FRAME_RELATED_P (insn
) = 1;
20604 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
20605 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
20606 stack_pointer_rtx
, insn
));
20607 RTX_FRAME_RELATED_P (insn
) = 1;
20611 size
= offsets
->outgoing_args
- offsets
->saved_args
;
20612 if (flag_stack_usage_info
)
20613 current_function_static_stack_size
= size
;
20615 /* If this isn't an interrupt service routine and we have a frame, then do
20616 stack checking. We use IP as the first scratch register, except for the
20617 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
20618 if (!IS_INTERRUPT (func_type
)
20619 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
20621 unsigned int regno
;
20623 if (!IS_NESTED (func_type
) || clobber_ip
)
20625 else if (df_regs_ever_live_p (LR_REGNUM
))
20630 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
20632 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
20633 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
20634 size
- STACK_CHECK_PROTECT
,
20635 regno
, live_regs_mask
);
20638 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
20639 regno
, live_regs_mask
);
20642 /* Recover the static chain register. */
20645 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
20646 insn
= gen_rtx_REG (SImode
, 3);
20649 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
20650 insn
= gen_frame_mem (SImode
, insn
);
20652 emit_set_insn (ip_rtx
, insn
);
20653 emit_insn (gen_force_register_use (ip_rtx
));
20656 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
20658 /* This add can produce multiple insns for a large constant, so we
20659 need to get tricky. */
20660 rtx_insn
*last
= get_last_insn ();
20662 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
20663 - offsets
->outgoing_args
);
20665 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20669 last
= last
? NEXT_INSN (last
) : get_insns ();
20670 RTX_FRAME_RELATED_P (last
) = 1;
20672 while (last
!= insn
);
20674 /* If the frame pointer is needed, emit a special barrier that
20675 will prevent the scheduler from moving stores to the frame
20676 before the stack adjustment. */
20677 if (frame_pointer_needed
)
20678 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
20679 hard_frame_pointer_rtx
));
20683 if (frame_pointer_needed
&& TARGET_THUMB2
)
20684 thumb_set_frame_pointer (offsets
);
20686 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20688 unsigned long mask
;
20690 mask
= live_regs_mask
;
20691 mask
&= THUMB2_WORK_REGS
;
20692 if (!IS_NESTED (func_type
))
20693 mask
|= (1 << IP_REGNUM
);
20694 arm_load_pic_register (mask
);
20697 /* If we are profiling, make sure no instructions are scheduled before
20698 the call to mcount. Similarly if the user has requested no
20699 scheduling in the prolog. Similarly if we want non-call exceptions
20700 using the EABI unwinder, to prevent faulting instructions from being
20701 swapped with a stack adjustment. */
20702 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20703 || (arm_except_unwind_info (&global_options
) == UI_TARGET
20704 && cfun
->can_throw_non_call_exceptions
))
20705 emit_insn (gen_blockage ());
20707 /* If the link register is being kept alive, with the return address in it,
20708 then make sure that it does not get reused by the ce2 pass. */
20709 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
20710 cfun
->machine
->lr_save_eliminated
= 1;
20713 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20715 arm_print_condition (FILE *stream
)
20717 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
20719 /* Branch conversion is not implemented for Thumb-2. */
20722 output_operand_lossage ("predicated Thumb instruction");
20725 if (current_insn_predicate
!= NULL
)
20727 output_operand_lossage
20728 ("predicated instruction in conditional sequence");
20732 fputs (arm_condition_codes
[arm_current_cc
], stream
);
20734 else if (current_insn_predicate
)
20736 enum arm_cond_code code
;
20740 output_operand_lossage ("predicated Thumb instruction");
20744 code
= get_arm_condition_code (current_insn_predicate
);
20745 fputs (arm_condition_codes
[code
], stream
);
20750 /* Globally reserved letters: acln
20751 Puncutation letters currently used: @_|?().!#
20752 Lower case letters currently used: bcdefhimpqtvwxyz
20753 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
20754 Letters previously used, but now deprecated/obsolete: sVWXYZ.
20756 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
20758 If CODE is 'd', then the X is a condition operand and the instruction
20759 should only be executed if the condition is true.
20760 if CODE is 'D', then the X is a condition operand and the instruction
20761 should only be executed if the condition is false: however, if the mode
20762 of the comparison is CCFPEmode, then always execute the instruction -- we
20763 do this because in these circumstances !GE does not necessarily imply LT;
20764 in these cases the instruction pattern will take care to make sure that
20765 an instruction containing %d will follow, thereby undoing the effects of
20766 doing this instruction unconditionally.
20767 If CODE is 'N' then X is a floating point operand that must be negated
20769 If CODE is 'B' then output a bitwise inverted value of X (a const int).
20770 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
20772 arm_print_operand (FILE *stream
, rtx x
, int code
)
20777 fputs (ASM_COMMENT_START
, stream
);
20781 fputs (user_label_prefix
, stream
);
20785 fputs (REGISTER_PREFIX
, stream
);
20789 arm_print_condition (stream
);
20793 /* The current condition code for a condition code setting instruction.
20794 Preceded by 's' in unified syntax, otherwise followed by 's'. */
20795 fputc('s', stream
);
20796 arm_print_condition (stream
);
20800 /* If the instruction is conditionally executed then print
20801 the current condition code, otherwise print 's'. */
20802 gcc_assert (TARGET_THUMB2
);
20803 if (current_insn_predicate
)
20804 arm_print_condition (stream
);
20806 fputc('s', stream
);
20809 /* %# is a "break" sequence. It doesn't output anything, but is used to
20810 separate e.g. operand numbers from following text, if that text consists
20811 of further digits which we don't want to be part of the operand
20819 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
20820 fprintf (stream
, "%s", fp_const_from_val (&r
));
20824 /* An integer or symbol address without a preceding # sign. */
20826 switch (GET_CODE (x
))
20829 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
20833 output_addr_const (stream
, x
);
20837 if (GET_CODE (XEXP (x
, 0)) == PLUS
20838 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
20840 output_addr_const (stream
, x
);
20843 /* Fall through. */
20846 output_operand_lossage ("Unsupported operand for code '%c'", code
);
20850 /* An integer that we want to print in HEX. */
20852 switch (GET_CODE (x
))
20855 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
20859 output_operand_lossage ("Unsupported operand for code '%c'", code
);
20864 if (CONST_INT_P (x
))
20867 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
20868 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
20872 putc ('~', stream
);
20873 output_addr_const (stream
, x
);
20878 /* Print the log2 of a CONST_INT. */
20882 if (!CONST_INT_P (x
)
20883 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
20884 output_operand_lossage ("Unsupported operand for code '%c'", code
);
20886 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
20891 /* The low 16 bits of an immediate constant. */
20892 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
20896 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
20900 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
20908 shift
= shift_op (x
, &val
);
20912 fprintf (stream
, ", %s ", shift
);
20914 arm_print_operand (stream
, XEXP (x
, 1), 0);
20916 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
20921 /* An explanation of the 'Q', 'R' and 'H' register operands:
20923 In a pair of registers containing a DI or DF value the 'Q'
20924 operand returns the register number of the register containing
20925 the least significant part of the value. The 'R' operand returns
20926 the register number of the register containing the most
20927 significant part of the value.
20929 The 'H' operand returns the higher of the two register numbers.
20930 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
20931 same as the 'Q' operand, since the most significant part of the
20932 value is held in the lower number register. The reverse is true
20933 on systems where WORDS_BIG_ENDIAN is false.
20935 The purpose of these operands is to distinguish between cases
20936 where the endian-ness of the values is important (for example
20937 when they are added together), and cases where the endian-ness
20938 is irrelevant, but the order of register operations is important.
20939 For example when loading a value from memory into a register
20940 pair, the endian-ness does not matter. Provided that the value
20941 from the lower memory address is put into the lower numbered
20942 register, and the value from the higher address is put into the
20943 higher numbered register, the load will work regardless of whether
20944 the value being loaded is big-wordian or little-wordian. The
20945 order of the two register loads can matter however, if the address
20946 of the memory location is actually held in one of the registers
20947 being overwritten by the load.
20949 The 'Q' and 'R' constraints are also available for 64-bit
20952 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
20954 rtx part
= gen_lowpart (SImode
, x
);
20955 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
20959 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20961 output_operand_lossage ("invalid operand for code '%c'", code
);
20965 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
20969 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
20971 machine_mode mode
= GET_MODE (x
);
20974 if (mode
== VOIDmode
)
20976 part
= gen_highpart_mode (SImode
, mode
, x
);
20977 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
20981 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20983 output_operand_lossage ("invalid operand for code '%c'", code
);
20987 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
20991 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
20993 output_operand_lossage ("invalid operand for code '%c'", code
);
20997 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
21001 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21003 output_operand_lossage ("invalid operand for code '%c'", code
);
21007 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
21011 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21013 output_operand_lossage ("invalid operand for code '%c'", code
);
21017 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
21021 asm_fprintf (stream
, "%r",
21022 REG_P (XEXP (x
, 0))
21023 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
21027 asm_fprintf (stream
, "{%r-%r}",
21029 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
21032 /* Like 'M', but writing doubleword vector registers, for use by Neon
21036 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
21037 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
21039 asm_fprintf (stream
, "{d%d}", regno
);
21041 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
21046 /* CONST_TRUE_RTX means always -- that's the default. */
21047 if (x
== const_true_rtx
)
21050 if (!COMPARISON_P (x
))
21052 output_operand_lossage ("invalid operand for code '%c'", code
);
21056 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
21061 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21062 want to do that. */
21063 if (x
== const_true_rtx
)
21065 output_operand_lossage ("instruction never executed");
21068 if (!COMPARISON_P (x
))
21070 output_operand_lossage ("invalid operand for code '%c'", code
);
21074 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
21075 (get_arm_condition_code (x
))],
21085 /* Former Maverick support, removed after GCC-4.7. */
21086 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
21091 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
21092 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
21093 /* Bad value for wCG register number. */
21095 output_operand_lossage ("invalid operand for code '%c'", code
);
21100 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
21103 /* Print an iWMMXt control register name. */
21105 if (!CONST_INT_P (x
)
21107 || INTVAL (x
) >= 16)
21108 /* Bad value for wC register number. */
21110 output_operand_lossage ("invalid operand for code '%c'", code
);
21116 static const char * wc_reg_names
[16] =
21118 "wCID", "wCon", "wCSSF", "wCASF",
21119 "wC4", "wC5", "wC6", "wC7",
21120 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21121 "wC12", "wC13", "wC14", "wC15"
21124 fputs (wc_reg_names
[INTVAL (x
)], stream
);
21128 /* Print the high single-precision register of a VFP double-precision
21132 machine_mode mode
= GET_MODE (x
);
21135 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
21137 output_operand_lossage ("invalid operand for code '%c'", code
);
21142 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
21144 output_operand_lossage ("invalid operand for code '%c'", code
);
21148 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
21152 /* Print a VFP/Neon double precision or quad precision register name. */
21156 machine_mode mode
= GET_MODE (x
);
21157 int is_quad
= (code
== 'q');
21160 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
21162 output_operand_lossage ("invalid operand for code '%c'", code
);
21167 || !IS_VFP_REGNUM (REGNO (x
)))
21169 output_operand_lossage ("invalid operand for code '%c'", code
);
21174 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
21175 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
21177 output_operand_lossage ("invalid operand for code '%c'", code
);
21181 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
21182 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
21186 /* These two codes print the low/high doubleword register of a Neon quad
21187 register, respectively. For pair-structure types, can also print
21188 low/high quadword registers. */
21192 machine_mode mode
= GET_MODE (x
);
21195 if ((GET_MODE_SIZE (mode
) != 16
21196 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
21198 output_operand_lossage ("invalid operand for code '%c'", code
);
21203 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
21205 output_operand_lossage ("invalid operand for code '%c'", code
);
21209 if (GET_MODE_SIZE (mode
) == 16)
21210 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
21211 + (code
== 'f' ? 1 : 0));
21213 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
21214 + (code
== 'f' ? 1 : 0));
21218 /* Print a VFPv3 floating-point constant, represented as an integer
21222 int index
= vfp3_const_double_index (x
);
21223 gcc_assert (index
!= -1);
21224 fprintf (stream
, "%d", index
);
21228 /* Print bits representing opcode features for Neon.
21230 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21231 and polynomials as unsigned.
21233 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21235 Bit 2 is 1 for rounding functions, 0 otherwise. */
21237 /* Identify the type as 's', 'u', 'p' or 'f'. */
21240 HOST_WIDE_INT bits
= INTVAL (x
);
21241 fputc ("uspf"[bits
& 3], stream
);
21245 /* Likewise, but signed and unsigned integers are both 'i'. */
21248 HOST_WIDE_INT bits
= INTVAL (x
);
21249 fputc ("iipf"[bits
& 3], stream
);
21253 /* As for 'T', but emit 'u' instead of 'p'. */
21256 HOST_WIDE_INT bits
= INTVAL (x
);
21257 fputc ("usuf"[bits
& 3], stream
);
21261 /* Bit 2: rounding (vs none). */
21264 HOST_WIDE_INT bits
= INTVAL (x
);
21265 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
21269 /* Memory operand for vld1/vst1 instruction. */
21273 bool postinc
= FALSE
;
21274 rtx postinc_reg
= NULL
;
21275 unsigned align
, memsize
, align_bits
;
21277 gcc_assert (MEM_P (x
));
21278 addr
= XEXP (x
, 0);
21279 if (GET_CODE (addr
) == POST_INC
)
21282 addr
= XEXP (addr
, 0);
21284 if (GET_CODE (addr
) == POST_MODIFY
)
21286 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
21287 addr
= XEXP (addr
, 0);
21289 asm_fprintf (stream
, "[%r", REGNO (addr
));
21291 /* We know the alignment of this access, so we can emit a hint in the
21292 instruction (for some alignments) as an aid to the memory subsystem
21294 align
= MEM_ALIGN (x
) >> 3;
21295 memsize
= MEM_SIZE (x
);
21297 /* Only certain alignment specifiers are supported by the hardware. */
21298 if (memsize
== 32 && (align
% 32) == 0)
21300 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
21302 else if (memsize
>= 8 && (align
% 8) == 0)
21307 if (align_bits
!= 0)
21308 asm_fprintf (stream
, ":%d", align_bits
);
21310 asm_fprintf (stream
, "]");
21313 fputs("!", stream
);
21315 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
21323 gcc_assert (MEM_P (x
));
21324 addr
= XEXP (x
, 0);
21325 gcc_assert (REG_P (addr
));
21326 asm_fprintf (stream
, "[%r]", REGNO (addr
));
21330 /* Translate an S register number into a D register number and element index. */
21333 machine_mode mode
= GET_MODE (x
);
21336 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
21338 output_operand_lossage ("invalid operand for code '%c'", code
);
21343 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21345 output_operand_lossage ("invalid operand for code '%c'", code
);
21349 regno
= regno
- FIRST_VFP_REGNUM
;
21350 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
21355 gcc_assert (CONST_DOUBLE_P (x
));
21357 result
= vfp3_const_double_for_fract_bits (x
);
21359 result
= vfp3_const_double_for_bits (x
);
21360 fprintf (stream
, "#%d", result
);
21363 /* Register specifier for vld1.16/vst1.16. Translate the S register
21364 number into a D register number and element index. */
21367 machine_mode mode
= GET_MODE (x
);
21370 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
21372 output_operand_lossage ("invalid operand for code '%c'", code
);
21377 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
21379 output_operand_lossage ("invalid operand for code '%c'", code
);
21383 regno
= regno
- FIRST_VFP_REGNUM
;
21384 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
21391 output_operand_lossage ("missing operand");
21395 switch (GET_CODE (x
))
21398 asm_fprintf (stream
, "%r", REGNO (x
));
21402 output_address (GET_MODE (x
), XEXP (x
, 0));
21408 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
21409 sizeof (fpstr
), 0, 1);
21410 fprintf (stream
, "#%s", fpstr
);
21415 gcc_assert (GET_CODE (x
) != NEG
);
21416 fputc ('#', stream
);
21417 if (GET_CODE (x
) == HIGH
)
21419 fputs (":lower16:", stream
);
21423 output_addr_const (stream
, x
);
21429 /* Target hook for printing a memory address. */
21431 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
21435 int is_minus
= GET_CODE (x
) == MINUS
;
21438 asm_fprintf (stream
, "[%r]", REGNO (x
));
21439 else if (GET_CODE (x
) == PLUS
|| is_minus
)
21441 rtx base
= XEXP (x
, 0);
21442 rtx index
= XEXP (x
, 1);
21443 HOST_WIDE_INT offset
= 0;
21445 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
21447 /* Ensure that BASE is a register. */
21448 /* (one of them must be). */
21449 /* Also ensure the SP is not used as in index register. */
21450 std::swap (base
, index
);
21452 switch (GET_CODE (index
))
21455 offset
= INTVAL (index
);
21458 asm_fprintf (stream
, "[%r, #%wd]",
21459 REGNO (base
), offset
);
21463 asm_fprintf (stream
, "[%r, %s%r]",
21464 REGNO (base
), is_minus
? "-" : "",
21474 asm_fprintf (stream
, "[%r, %s%r",
21475 REGNO (base
), is_minus
? "-" : "",
21476 REGNO (XEXP (index
, 0)));
21477 arm_print_operand (stream
, index
, 'S');
21478 fputs ("]", stream
);
21483 gcc_unreachable ();
21486 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
21487 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
21489 gcc_assert (REG_P (XEXP (x
, 0)));
21491 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
21492 asm_fprintf (stream
, "[%r, #%s%d]!",
21493 REGNO (XEXP (x
, 0)),
21494 GET_CODE (x
) == PRE_DEC
? "-" : "",
21495 GET_MODE_SIZE (mode
));
21497 asm_fprintf (stream
, "[%r], #%s%d",
21498 REGNO (XEXP (x
, 0)),
21499 GET_CODE (x
) == POST_DEC
? "-" : "",
21500 GET_MODE_SIZE (mode
));
21502 else if (GET_CODE (x
) == PRE_MODIFY
)
21504 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
21505 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21506 asm_fprintf (stream
, "#%wd]!",
21507 INTVAL (XEXP (XEXP (x
, 1), 1)));
21509 asm_fprintf (stream
, "%r]!",
21510 REGNO (XEXP (XEXP (x
, 1), 1)));
21512 else if (GET_CODE (x
) == POST_MODIFY
)
21514 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
21515 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
21516 asm_fprintf (stream
, "#%wd",
21517 INTVAL (XEXP (XEXP (x
, 1), 1)));
21519 asm_fprintf (stream
, "%r",
21520 REGNO (XEXP (XEXP (x
, 1), 1)));
21522 else output_addr_const (stream
, x
);
21527 asm_fprintf (stream
, "[%r]", REGNO (x
));
21528 else if (GET_CODE (x
) == POST_INC
)
21529 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
21530 else if (GET_CODE (x
) == PLUS
)
21532 gcc_assert (REG_P (XEXP (x
, 0)));
21533 if (CONST_INT_P (XEXP (x
, 1)))
21534 asm_fprintf (stream
, "[%r, #%wd]",
21535 REGNO (XEXP (x
, 0)),
21536 INTVAL (XEXP (x
, 1)));
21538 asm_fprintf (stream
, "[%r, %r]",
21539 REGNO (XEXP (x
, 0)),
21540 REGNO (XEXP (x
, 1)));
21543 output_addr_const (stream
, x
);
21547 /* Target hook for indicating whether a punctuation character for
21548 TARGET_PRINT_OPERAND is valid. */
21550 arm_print_operand_punct_valid_p (unsigned char code
)
21552 return (code
== '@' || code
== '|' || code
== '.'
21553 || code
== '(' || code
== ')' || code
== '#'
21554 || (TARGET_32BIT
&& (code
== '?'))
21555 || (TARGET_THUMB2
&& (code
== '!'))
21556 || (TARGET_THUMB
&& (code
== '_')));
21559 /* Target hook for assembling integer objects. The ARM version needs to
21560 handle word-sized values specially. */
21562 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
21566 if (size
== UNITS_PER_WORD
&& aligned_p
)
21568 fputs ("\t.word\t", asm_out_file
);
21569 output_addr_const (asm_out_file
, x
);
21571 /* Mark symbols as position independent. We only do this in the
21572 .text segment, not in the .data segment. */
21573 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
21574 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
21576 /* See legitimize_pic_address for an explanation of the
21577 TARGET_VXWORKS_RTP check. */
21578 if (!arm_pic_data_is_text_relative
21579 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
21580 fputs ("(GOT)", asm_out_file
);
21582 fputs ("(GOTOFF)", asm_out_file
);
21584 fputc ('\n', asm_out_file
);
21588 mode
= GET_MODE (x
);
21590 if (arm_vector_mode_supported_p (mode
))
21594 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21596 units
= CONST_VECTOR_NUNITS (x
);
21597 size
= GET_MODE_UNIT_SIZE (mode
);
21599 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21600 for (i
= 0; i
< units
; i
++)
21602 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21604 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
21607 for (i
= 0; i
< units
; i
++)
21609 rtx elt
= CONST_VECTOR_ELT (x
, i
);
21611 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
21612 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
21618 return default_assemble_integer (x
, size
, aligned_p
);
21622 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
21626 if (!TARGET_AAPCS_BASED
)
21629 default_named_section_asm_out_constructor
21630 : default_named_section_asm_out_destructor
) (symbol
, priority
);
21634 /* Put these in the .init_array section, using a special relocation. */
21635 if (priority
!= DEFAULT_INIT_PRIORITY
)
21638 sprintf (buf
, "%s.%.5u",
21639 is_ctor
? ".init_array" : ".fini_array",
21641 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
21648 switch_to_section (s
);
21649 assemble_align (POINTER_SIZE
);
21650 fputs ("\t.word\t", asm_out_file
);
21651 output_addr_const (asm_out_file
, symbol
);
21652 fputs ("(target1)\n", asm_out_file
);
21655 /* Add a function to the list of static constructors. */
21658 arm_elf_asm_constructor (rtx symbol
, int priority
)
21660 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
21663 /* Add a function to the list of static destructors. */
21666 arm_elf_asm_destructor (rtx symbol
, int priority
)
21668 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
21671 /* A finite state machine takes care of noticing whether or not instructions
21672 can be conditionally executed, and thus decrease execution time and code
21673 size by deleting branch instructions. The fsm is controlled by
21674 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21676 /* The state of the fsm controlling condition codes are:
21677 0: normal, do nothing special
21678 1: make ASM_OUTPUT_OPCODE not output this instruction
21679 2: make ASM_OUTPUT_OPCODE not output this instruction
21680 3: make instructions conditional
21681 4: make instructions conditional
21683 State transitions (state->state by whom under condition):
21684 0 -> 1 final_prescan_insn if the `target' is a label
21685 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21686 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21687 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21688 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21689 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21690 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21691 (the target insn is arm_target_insn).
21693 If the jump clobbers the conditions then we use states 2 and 4.
21695 A similar thing can be done with conditional return insns.
21697 XXX In case the `target' is an unconditional branch, this conditionalising
21698 of the instructions always reduces code size, but not always execution
21699 time. But then, I want to reduce the code size to somewhere near what
21700 /bin/cc produces. */
21702 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21703 instructions. When a COND_EXEC instruction is seen the subsequent
21704 instructions are scanned so that multiple conditional instructions can be
21705 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21706 specify the length and true/false mask for the IT block. These will be
21707 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21709 /* Returns the index of the ARM condition code string in
21710 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21711 COMPARISON should be an rtx like `(eq (...) (...))'. */
21714 maybe_get_arm_condition_code (rtx comparison
)
21716 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
21717 enum arm_cond_code code
;
21718 enum rtx_code comp_code
= GET_CODE (comparison
);
21720 if (GET_MODE_CLASS (mode
) != MODE_CC
)
21721 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
21722 XEXP (comparison
, 1));
21726 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
21727 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
21728 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
21729 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
21730 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
21731 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
21732 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
21733 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
21734 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
21735 case CC_DLTUmode
: code
= ARM_CC
;
21738 if (comp_code
== EQ
)
21739 return ARM_INVERSE_CONDITION_CODE (code
);
21740 if (comp_code
== NE
)
21747 case NE
: return ARM_NE
;
21748 case EQ
: return ARM_EQ
;
21749 case GE
: return ARM_PL
;
21750 case LT
: return ARM_MI
;
21751 default: return ARM_NV
;
21757 case NE
: return ARM_NE
;
21758 case EQ
: return ARM_EQ
;
21759 default: return ARM_NV
;
21765 case NE
: return ARM_MI
;
21766 case EQ
: return ARM_PL
;
21767 default: return ARM_NV
;
21772 /* We can handle all cases except UNEQ and LTGT. */
21775 case GE
: return ARM_GE
;
21776 case GT
: return ARM_GT
;
21777 case LE
: return ARM_LS
;
21778 case LT
: return ARM_MI
;
21779 case NE
: return ARM_NE
;
21780 case EQ
: return ARM_EQ
;
21781 case ORDERED
: return ARM_VC
;
21782 case UNORDERED
: return ARM_VS
;
21783 case UNLT
: return ARM_LT
;
21784 case UNLE
: return ARM_LE
;
21785 case UNGT
: return ARM_HI
;
21786 case UNGE
: return ARM_PL
;
21787 /* UNEQ and LTGT do not have a representation. */
21788 case UNEQ
: /* Fall through. */
21789 case LTGT
: /* Fall through. */
21790 default: return ARM_NV
;
21796 case NE
: return ARM_NE
;
21797 case EQ
: return ARM_EQ
;
21798 case GE
: return ARM_LE
;
21799 case GT
: return ARM_LT
;
21800 case LE
: return ARM_GE
;
21801 case LT
: return ARM_GT
;
21802 case GEU
: return ARM_LS
;
21803 case GTU
: return ARM_CC
;
21804 case LEU
: return ARM_CS
;
21805 case LTU
: return ARM_HI
;
21806 default: return ARM_NV
;
21812 case LTU
: return ARM_CS
;
21813 case GEU
: return ARM_CC
;
21814 case NE
: return ARM_CS
;
21815 case EQ
: return ARM_CC
;
21816 default: return ARM_NV
;
21822 case NE
: return ARM_NE
;
21823 case EQ
: return ARM_EQ
;
21824 case GEU
: return ARM_CS
;
21825 case GTU
: return ARM_HI
;
21826 case LEU
: return ARM_LS
;
21827 case LTU
: return ARM_CC
;
21828 default: return ARM_NV
;
21834 case GE
: return ARM_GE
;
21835 case LT
: return ARM_LT
;
21836 case GEU
: return ARM_CS
;
21837 case LTU
: return ARM_CC
;
21838 default: return ARM_NV
;
21844 case NE
: return ARM_VS
;
21845 case EQ
: return ARM_VC
;
21846 default: return ARM_NV
;
21852 case NE
: return ARM_NE
;
21853 case EQ
: return ARM_EQ
;
21854 case GE
: return ARM_GE
;
21855 case GT
: return ARM_GT
;
21856 case LE
: return ARM_LE
;
21857 case LT
: return ARM_LT
;
21858 case GEU
: return ARM_CS
;
21859 case GTU
: return ARM_HI
;
21860 case LEU
: return ARM_LS
;
21861 case LTU
: return ARM_CC
;
21862 default: return ARM_NV
;
21865 default: gcc_unreachable ();
21869 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
21870 static enum arm_cond_code
21871 get_arm_condition_code (rtx comparison
)
21873 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
21874 gcc_assert (code
!= ARM_NV
);
21878 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
21881 thumb2_final_prescan_insn (rtx_insn
*insn
)
21883 rtx_insn
*first_insn
= insn
;
21884 rtx body
= PATTERN (insn
);
21886 enum arm_cond_code code
;
21891 /* max_insns_skipped in the tune was already taken into account in the
21892 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
21893 just emit the IT blocks as we can. It does not make sense to split
21895 max
= MAX_INSN_PER_IT_BLOCK
;
21897 /* Remove the previous insn from the count of insns to be output. */
21898 if (arm_condexec_count
)
21899 arm_condexec_count
--;
21901 /* Nothing to do if we are already inside a conditional block. */
21902 if (arm_condexec_count
)
21905 if (GET_CODE (body
) != COND_EXEC
)
21908 /* Conditional jumps are implemented directly. */
21912 predicate
= COND_EXEC_TEST (body
);
21913 arm_current_cc
= get_arm_condition_code (predicate
);
21915 n
= get_attr_ce_count (insn
);
21916 arm_condexec_count
= 1;
21917 arm_condexec_mask
= (1 << n
) - 1;
21918 arm_condexec_masklen
= n
;
21919 /* See if subsequent instructions can be combined into the same block. */
21922 insn
= next_nonnote_insn (insn
);
21924 /* Jumping into the middle of an IT block is illegal, so a label or
21925 barrier terminates the block. */
21926 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
21929 body
= PATTERN (insn
);
21930 /* USE and CLOBBER aren't really insns, so just skip them. */
21931 if (GET_CODE (body
) == USE
21932 || GET_CODE (body
) == CLOBBER
)
21935 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
21936 if (GET_CODE (body
) != COND_EXEC
)
21938 /* Maximum number of conditionally executed instructions in a block. */
21939 n
= get_attr_ce_count (insn
);
21940 if (arm_condexec_masklen
+ n
> max
)
21943 predicate
= COND_EXEC_TEST (body
);
21944 code
= get_arm_condition_code (predicate
);
21945 mask
= (1 << n
) - 1;
21946 if (arm_current_cc
== code
)
21947 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
21948 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
21951 arm_condexec_count
++;
21952 arm_condexec_masklen
+= n
;
21954 /* A jump must be the last instruction in a conditional block. */
21958 /* Restore recog_data (getting the attributes of other insns can
21959 destroy this array, but final.c assumes that it remains intact
21960 across this call). */
21961 extract_constrain_insn_cached (first_insn
);
21965 arm_final_prescan_insn (rtx_insn
*insn
)
21967 /* BODY will hold the body of INSN. */
21968 rtx body
= PATTERN (insn
);
21970 /* This will be 1 if trying to repeat the trick, and things need to be
21971 reversed if it appears to fail. */
21974 /* If we start with a return insn, we only succeed if we find another one. */
21975 int seeking_return
= 0;
21976 enum rtx_code return_code
= UNKNOWN
;
21978 /* START_INSN will hold the insn from where we start looking. This is the
21979 first insn after the following code_label if REVERSE is true. */
21980 rtx_insn
*start_insn
= insn
;
21982 /* If in state 4, check if the target branch is reached, in order to
21983 change back to state 0. */
21984 if (arm_ccfsm_state
== 4)
21986 if (insn
== arm_target_insn
)
21988 arm_target_insn
= NULL
;
21989 arm_ccfsm_state
= 0;
21994 /* If in state 3, it is possible to repeat the trick, if this insn is an
21995 unconditional branch to a label, and immediately following this branch
21996 is the previous target label which is only used once, and the label this
21997 branch jumps to is not too far off. */
21998 if (arm_ccfsm_state
== 3)
22000 if (simplejump_p (insn
))
22002 start_insn
= next_nonnote_insn (start_insn
);
22003 if (BARRIER_P (start_insn
))
22005 /* XXX Isn't this always a barrier? */
22006 start_insn
= next_nonnote_insn (start_insn
);
22008 if (LABEL_P (start_insn
)
22009 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22010 && LABEL_NUSES (start_insn
) == 1)
22015 else if (ANY_RETURN_P (body
))
22017 start_insn
= next_nonnote_insn (start_insn
);
22018 if (BARRIER_P (start_insn
))
22019 start_insn
= next_nonnote_insn (start_insn
);
22020 if (LABEL_P (start_insn
)
22021 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
22022 && LABEL_NUSES (start_insn
) == 1)
22025 seeking_return
= 1;
22026 return_code
= GET_CODE (body
);
22035 gcc_assert (!arm_ccfsm_state
|| reverse
);
22036 if (!JUMP_P (insn
))
22039 /* This jump might be paralleled with a clobber of the condition codes
22040 the jump should always come first */
22041 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
22042 body
= XVECEXP (body
, 0, 0);
22045 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
22046 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
22049 int fail
= FALSE
, succeed
= FALSE
;
22050 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22051 int then_not_else
= TRUE
;
22052 rtx_insn
*this_insn
= start_insn
;
22055 /* Register the insn jumped to. */
22058 if (!seeking_return
)
22059 label
= XEXP (SET_SRC (body
), 0);
22061 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
22062 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
22063 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
22065 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
22066 then_not_else
= FALSE
;
22068 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
22070 seeking_return
= 1;
22071 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
22073 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
22075 seeking_return
= 1;
22076 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
22077 then_not_else
= FALSE
;
22080 gcc_unreachable ();
22082 /* See how many insns this branch skips, and what kind of insns. If all
22083 insns are okay, and the label or unconditional branch to the same
22084 label is not too far away, succeed. */
22085 for (insns_skipped
= 0;
22086 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
22090 this_insn
= next_nonnote_insn (this_insn
);
22094 switch (GET_CODE (this_insn
))
22097 /* Succeed if it is the target label, otherwise fail since
22098 control falls in from somewhere else. */
22099 if (this_insn
== label
)
22101 arm_ccfsm_state
= 1;
22109 /* Succeed if the following insn is the target label.
22111 If return insns are used then the last insn in a function
22112 will be a barrier. */
22113 this_insn
= next_nonnote_insn (this_insn
);
22114 if (this_insn
&& this_insn
== label
)
22116 arm_ccfsm_state
= 1;
22124 /* The AAPCS says that conditional calls should not be
22125 used since they make interworking inefficient (the
22126 linker can't transform BL<cond> into BLX). That's
22127 only a problem if the machine has BLX. */
22134 /* Succeed if the following insn is the target label, or
22135 if the following two insns are a barrier and the
22137 this_insn
= next_nonnote_insn (this_insn
);
22138 if (this_insn
&& BARRIER_P (this_insn
))
22139 this_insn
= next_nonnote_insn (this_insn
);
22141 if (this_insn
&& this_insn
== label
22142 && insns_skipped
< max_insns_skipped
)
22144 arm_ccfsm_state
= 1;
22152 /* If this is an unconditional branch to the same label, succeed.
22153 If it is to another label, do nothing. If it is conditional,
22155 /* XXX Probably, the tests for SET and the PC are
22158 scanbody
= PATTERN (this_insn
);
22159 if (GET_CODE (scanbody
) == SET
22160 && GET_CODE (SET_DEST (scanbody
)) == PC
)
22162 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
22163 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
22165 arm_ccfsm_state
= 2;
22168 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
22171 /* Fail if a conditional return is undesirable (e.g. on a
22172 StrongARM), but still allow this if optimizing for size. */
22173 else if (GET_CODE (scanbody
) == return_code
22174 && !use_return_insn (TRUE
, NULL
)
22177 else if (GET_CODE (scanbody
) == return_code
)
22179 arm_ccfsm_state
= 2;
22182 else if (GET_CODE (scanbody
) == PARALLEL
)
22184 switch (get_attr_conds (this_insn
))
22194 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
22199 /* Instructions using or affecting the condition codes make it
22201 scanbody
= PATTERN (this_insn
);
22202 if (!(GET_CODE (scanbody
) == SET
22203 || GET_CODE (scanbody
) == PARALLEL
)
22204 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
22214 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
22215 arm_target_label
= CODE_LABEL_NUMBER (label
);
22218 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
22220 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
22222 this_insn
= next_nonnote_insn (this_insn
);
22223 gcc_assert (!this_insn
22224 || (!BARRIER_P (this_insn
)
22225 && !LABEL_P (this_insn
)));
22229 /* Oh, dear! we ran off the end.. give up. */
22230 extract_constrain_insn_cached (insn
);
22231 arm_ccfsm_state
= 0;
22232 arm_target_insn
= NULL
;
22235 arm_target_insn
= this_insn
;
22238 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22241 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
22243 if (reverse
|| then_not_else
)
22244 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
22247 /* Restore recog_data (getting the attributes of other insns can
22248 destroy this array, but final.c assumes that it remains intact
22249 across this call. */
22250 extract_constrain_insn_cached (insn
);
22254 /* Output IT instructions. */
22256 thumb2_asm_output_opcode (FILE * stream
)
22261 if (arm_condexec_mask
)
22263 for (n
= 0; n
< arm_condexec_masklen
; n
++)
22264 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
22266 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
22267 arm_condition_codes
[arm_current_cc
]);
22268 arm_condexec_mask
= 0;
22272 /* Returns true if REGNO is a valid register
22273 for holding a quantity of type MODE. */
22275 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
22277 if (GET_MODE_CLASS (mode
) == MODE_CC
)
22278 return (regno
== CC_REGNUM
22279 || (TARGET_HARD_FLOAT
22280 && regno
== VFPCC_REGNUM
));
22282 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
22286 /* For the Thumb we only allow values bigger than SImode in
22287 registers 0 - 6, so that there is always a second low
22288 register available to hold the upper part of the value.
22289 We probably we ought to ensure that the register is the
22290 start of an even numbered register pair. */
22291 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
22293 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
22295 if (mode
== SFmode
|| mode
== SImode
)
22296 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22298 if (mode
== DFmode
)
22299 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
22301 if (mode
== HFmode
)
22302 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22304 /* VFP registers can hold HImode values. */
22305 if (mode
== HImode
)
22306 return VFP_REGNO_OK_FOR_SINGLE (regno
);
22309 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
22310 || (VALID_NEON_QREG_MODE (mode
)
22311 && NEON_REGNO_OK_FOR_QUAD (regno
))
22312 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
22313 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
22314 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
22315 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
22316 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
22321 if (TARGET_REALLY_IWMMXT
)
22323 if (IS_IWMMXT_GR_REGNUM (regno
))
22324 return mode
== SImode
;
22326 if (IS_IWMMXT_REGNUM (regno
))
22327 return VALID_IWMMXT_REG_MODE (mode
);
22330 /* We allow almost any value to be stored in the general registers.
22331 Restrict doubleword quantities to even register pairs in ARM state
22332 so that we can use ldrd. Do not allow very large Neon structure
22333 opaque modes in general registers; they would use too many. */
22334 if (regno
<= LAST_ARM_REGNUM
)
22336 if (ARM_NUM_REGS (mode
) > 4)
22342 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
22345 if (regno
== FRAME_POINTER_REGNUM
22346 || regno
== ARG_POINTER_REGNUM
)
22347 /* We only allow integers in the fake hard registers. */
22348 return GET_MODE_CLASS (mode
) == MODE_INT
;
22353 /* Implement MODES_TIEABLE_P. */
22356 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
22358 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
22361 /* We specifically want to allow elements of "structure" modes to
22362 be tieable to the structure. This more general condition allows
22363 other rarer situations too. */
22365 && (VALID_NEON_DREG_MODE (mode1
)
22366 || VALID_NEON_QREG_MODE (mode1
)
22367 || VALID_NEON_STRUCT_MODE (mode1
))
22368 && (VALID_NEON_DREG_MODE (mode2
)
22369 || VALID_NEON_QREG_MODE (mode2
)
22370 || VALID_NEON_STRUCT_MODE (mode2
)))
22376 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22377 not used in arm mode. */
22380 arm_regno_class (int regno
)
22382 if (regno
== PC_REGNUM
)
22387 if (regno
== STACK_POINTER_REGNUM
)
22389 if (regno
== CC_REGNUM
)
22396 if (TARGET_THUMB2
&& regno
< 8)
22399 if ( regno
<= LAST_ARM_REGNUM
22400 || regno
== FRAME_POINTER_REGNUM
22401 || regno
== ARG_POINTER_REGNUM
)
22402 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
22404 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
22405 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
22407 if (IS_VFP_REGNUM (regno
))
22409 if (regno
<= D7_VFP_REGNUM
)
22410 return VFP_D0_D7_REGS
;
22411 else if (regno
<= LAST_LO_VFP_REGNUM
)
22412 return VFP_LO_REGS
;
22414 return VFP_HI_REGS
;
22417 if (IS_IWMMXT_REGNUM (regno
))
22418 return IWMMXT_REGS
;
22420 if (IS_IWMMXT_GR_REGNUM (regno
))
22421 return IWMMXT_GR_REGS
;
22426 /* Handle a special case when computing the offset
22427 of an argument from the frame pointer. */
22429 arm_debugger_arg_offset (int value
, rtx addr
)
22433 /* We are only interested if dbxout_parms() failed to compute the offset. */
22437 /* We can only cope with the case where the address is held in a register. */
22441 /* If we are using the frame pointer to point at the argument, then
22442 an offset of 0 is correct. */
22443 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
22446 /* If we are using the stack pointer to point at the
22447 argument, then an offset of 0 is correct. */
22448 /* ??? Check this is consistent with thumb2 frame layout. */
22449 if ((TARGET_THUMB
|| !frame_pointer_needed
)
22450 && REGNO (addr
) == SP_REGNUM
)
22453 /* Oh dear. The argument is pointed to by a register rather
22454 than being held in a register, or being stored at a known
22455 offset from the frame pointer. Since GDB only understands
22456 those two kinds of argument we must translate the address
22457 held in the register into an offset from the frame pointer.
22458 We do this by searching through the insns for the function
22459 looking to see where this register gets its value. If the
22460 register is initialized from the frame pointer plus an offset
22461 then we are in luck and we can continue, otherwise we give up.
22463 This code is exercised by producing debugging information
22464 for a function with arguments like this:
22466 double func (double a, double b, int c, double d) {return d;}
22468 Without this code the stab for parameter 'd' will be set to
22469 an offset of 0 from the frame pointer, rather than 8. */
22471 /* The if() statement says:
22473 If the insn is a normal instruction
22474 and if the insn is setting the value in a register
22475 and if the register being set is the register holding the address of the argument
22476 and if the address is computing by an addition
22477 that involves adding to a register
22478 which is the frame pointer
22483 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
22485 if ( NONJUMP_INSN_P (insn
)
22486 && GET_CODE (PATTERN (insn
)) == SET
22487 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
22488 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
22489 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
22490 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22491 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
22494 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
22503 warning (0, "unable to compute real location of stacked parameter");
22504 value
= 8; /* XXX magic hack */
22510 /* Implement TARGET_PROMOTED_TYPE. */
22513 arm_promoted_type (const_tree t
)
22515 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
22516 return float_type_node
;
22520 /* Implement TARGET_CONVERT_TO_TYPE.
22521 Specifically, this hook implements the peculiarity of the ARM
22522 half-precision floating-point C semantics that requires conversions between
22523 __fp16 to or from double to do an intermediate conversion to float. */
22526 arm_convert_to_type (tree type
, tree expr
)
22528 tree fromtype
= TREE_TYPE (expr
);
22529 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
22531 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
22532 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
22533 return convert (type
, convert (float_type_node
, expr
));
22537 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
22538 This simply adds HFmode as a supported mode; even though we don't
22539 implement arithmetic on this type directly, it's supported by
22540 optabs conversions, much the way the double-word arithmetic is
22541 special-cased in the default hook. */
22544 arm_scalar_mode_supported_p (machine_mode mode
)
22546 if (mode
== HFmode
)
22547 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
22548 else if (ALL_FIXED_POINT_MODE_P (mode
))
22551 return default_scalar_mode_supported_p (mode
);
22554 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
22555 not to early-clobber SRC registers in the process.
22557 We assume that the operands described by SRC and DEST represent a
22558 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
22559 number of components into which the copy has been decomposed. */
22561 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
22565 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
22566 || REGNO (operands
[0]) < REGNO (operands
[1]))
22568 for (i
= 0; i
< count
; i
++)
22570 operands
[2 * i
] = dest
[i
];
22571 operands
[2 * i
+ 1] = src
[i
];
22576 for (i
= 0; i
< count
; i
++)
22578 operands
[2 * i
] = dest
[count
- i
- 1];
22579 operands
[2 * i
+ 1] = src
[count
- i
- 1];
22584 /* Split operands into moves from op[1] + op[2] into op[0]. */
22587 neon_split_vcombine (rtx operands
[3])
22589 unsigned int dest
= REGNO (operands
[0]);
22590 unsigned int src1
= REGNO (operands
[1]);
22591 unsigned int src2
= REGNO (operands
[2]);
22592 machine_mode halfmode
= GET_MODE (operands
[1]);
22593 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
22594 rtx destlo
, desthi
;
22596 if (src1
== dest
&& src2
== dest
+ halfregs
)
22598 /* No-op move. Can't split to nothing; emit something. */
22599 emit_note (NOTE_INSN_DELETED
);
22603 /* Preserve register attributes for variable tracking. */
22604 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
22605 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
22606 GET_MODE_SIZE (halfmode
));
22608 /* Special case of reversed high/low parts. Use VSWP. */
22609 if (src2
== dest
&& src1
== dest
+ halfregs
)
22611 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
22612 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
22613 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
22617 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
22619 /* Try to avoid unnecessary moves if part of the result
22620 is in the right place already. */
22622 emit_move_insn (destlo
, operands
[1]);
22623 if (src2
!= dest
+ halfregs
)
22624 emit_move_insn (desthi
, operands
[2]);
22628 if (src2
!= dest
+ halfregs
)
22629 emit_move_insn (desthi
, operands
[2]);
22631 emit_move_insn (destlo
, operands
[1]);
22635 /* Return the number (counting from 0) of
22636 the least significant set bit in MASK. */
22639 number_of_first_bit_set (unsigned mask
)
22641 return ctz_hwi (mask
);
22644 /* Like emit_multi_reg_push, but allowing for a different set of
22645 registers to be described as saved. MASK is the set of registers
22646 to be saved; REAL_REGS is the set of registers to be described as
22647 saved. If REAL_REGS is 0, only describe the stack adjustment. */
22650 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
22652 unsigned long regno
;
22653 rtx par
[10], tmp
, reg
;
22657 /* Build the parallel of the registers actually being stored. */
22658 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
22660 regno
= ctz_hwi (mask
);
22661 reg
= gen_rtx_REG (SImode
, regno
);
22664 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
22666 tmp
= gen_rtx_USE (VOIDmode
, reg
);
22671 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
22672 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22673 tmp
= gen_frame_mem (BLKmode
, tmp
);
22674 tmp
= gen_rtx_SET (tmp
, par
[0]);
22677 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
22678 insn
= emit_insn (tmp
);
22680 /* Always build the stack adjustment note for unwind info. */
22681 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
22682 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
22685 /* Build the parallel of the registers recorded as saved for unwind. */
22686 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
22688 regno
= ctz_hwi (real_regs
);
22689 reg
= gen_rtx_REG (SImode
, regno
);
22691 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
22692 tmp
= gen_frame_mem (SImode
, tmp
);
22693 tmp
= gen_rtx_SET (tmp
, reg
);
22694 RTX_FRAME_RELATED_P (tmp
) = 1;
22702 RTX_FRAME_RELATED_P (par
[0]) = 1;
22703 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
22706 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
22711 /* Emit code to push or pop registers to or from the stack. F is the
22712 assembly file. MASK is the registers to pop. */
22714 thumb_pop (FILE *f
, unsigned long mask
)
22717 int lo_mask
= mask
& 0xFF;
22718 int pushed_words
= 0;
22722 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
22724 /* Special case. Do not generate a POP PC statement here, do it in
22726 thumb_exit (f
, -1);
22730 fprintf (f
, "\tpop\t{");
22732 /* Look at the low registers first. */
22733 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
22737 asm_fprintf (f
, "%r", regno
);
22739 if ((lo_mask
& ~1) != 0)
22746 if (mask
& (1 << PC_REGNUM
))
22748 /* Catch popping the PC. */
22749 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
22750 || crtl
->calls_eh_return
)
22752 /* The PC is never poped directly, instead
22753 it is popped into r3 and then BX is used. */
22754 fprintf (f
, "}\n");
22756 thumb_exit (f
, -1);
22765 asm_fprintf (f
, "%r", PC_REGNUM
);
22769 fprintf (f
, "}\n");
22772 /* Generate code to return from a thumb function.
22773 If 'reg_containing_return_addr' is -1, then the return address is
22774 actually on the stack, at the stack pointer. */
22776 thumb_exit (FILE *f
, int reg_containing_return_addr
)
22778 unsigned regs_available_for_popping
;
22779 unsigned regs_to_pop
;
22781 unsigned available
;
22785 int restore_a4
= FALSE
;
22787 /* Compute the registers we need to pop. */
22791 if (reg_containing_return_addr
== -1)
22793 regs_to_pop
|= 1 << LR_REGNUM
;
22797 if (TARGET_BACKTRACE
)
22799 /* Restore the (ARM) frame pointer and stack pointer. */
22800 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
22804 /* If there is nothing to pop then just emit the BX instruction and
22806 if (pops_needed
== 0)
22808 if (crtl
->calls_eh_return
)
22809 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
22811 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
22814 /* Otherwise if we are not supporting interworking and we have not created
22815 a backtrace structure and the function was not entered in ARM mode then
22816 just pop the return address straight into the PC. */
22817 else if (!TARGET_INTERWORK
22818 && !TARGET_BACKTRACE
22819 && !is_called_in_ARM_mode (current_function_decl
)
22820 && !crtl
->calls_eh_return
)
22822 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
22826 /* Find out how many of the (return) argument registers we can corrupt. */
22827 regs_available_for_popping
= 0;
22829 /* If returning via __builtin_eh_return, the bottom three registers
22830 all contain information needed for the return. */
22831 if (crtl
->calls_eh_return
)
22835 /* If we can deduce the registers used from the function's
22836 return value. This is more reliable that examining
22837 df_regs_ever_live_p () because that will be set if the register is
22838 ever used in the function, not just if the register is used
22839 to hold a return value. */
22841 if (crtl
->return_rtx
!= 0)
22842 mode
= GET_MODE (crtl
->return_rtx
);
22844 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
22846 size
= GET_MODE_SIZE (mode
);
22850 /* In a void function we can use any argument register.
22851 In a function that returns a structure on the stack
22852 we can use the second and third argument registers. */
22853 if (mode
== VOIDmode
)
22854 regs_available_for_popping
=
22855 (1 << ARG_REGISTER (1))
22856 | (1 << ARG_REGISTER (2))
22857 | (1 << ARG_REGISTER (3));
22859 regs_available_for_popping
=
22860 (1 << ARG_REGISTER (2))
22861 | (1 << ARG_REGISTER (3));
22863 else if (size
<= 4)
22864 regs_available_for_popping
=
22865 (1 << ARG_REGISTER (2))
22866 | (1 << ARG_REGISTER (3));
22867 else if (size
<= 8)
22868 regs_available_for_popping
=
22869 (1 << ARG_REGISTER (3));
22872 /* Match registers to be popped with registers into which we pop them. */
22873 for (available
= regs_available_for_popping
,
22874 required
= regs_to_pop
;
22875 required
!= 0 && available
!= 0;
22876 available
&= ~(available
& - available
),
22877 required
&= ~(required
& - required
))
22880 /* If we have any popping registers left over, remove them. */
22882 regs_available_for_popping
&= ~available
;
22884 /* Otherwise if we need another popping register we can use
22885 the fourth argument register. */
22886 else if (pops_needed
)
22888 /* If we have not found any free argument registers and
22889 reg a4 contains the return address, we must move it. */
22890 if (regs_available_for_popping
== 0
22891 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
22893 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
22894 reg_containing_return_addr
= LR_REGNUM
;
22896 else if (size
> 12)
22898 /* Register a4 is being used to hold part of the return value,
22899 but we have dire need of a free, low register. */
22902 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
22905 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
22907 /* The fourth argument register is available. */
22908 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
22914 /* Pop as many registers as we can. */
22915 thumb_pop (f
, regs_available_for_popping
);
22917 /* Process the registers we popped. */
22918 if (reg_containing_return_addr
== -1)
22920 /* The return address was popped into the lowest numbered register. */
22921 regs_to_pop
&= ~(1 << LR_REGNUM
);
22923 reg_containing_return_addr
=
22924 number_of_first_bit_set (regs_available_for_popping
);
22926 /* Remove this register for the mask of available registers, so that
22927 the return address will not be corrupted by further pops. */
22928 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
22931 /* If we popped other registers then handle them here. */
22932 if (regs_available_for_popping
)
22936 /* Work out which register currently contains the frame pointer. */
22937 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
22939 /* Move it into the correct place. */
22940 asm_fprintf (f
, "\tmov\t%r, %r\n",
22941 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
22943 /* (Temporarily) remove it from the mask of popped registers. */
22944 regs_available_for_popping
&= ~(1 << frame_pointer
);
22945 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
22947 if (regs_available_for_popping
)
22951 /* We popped the stack pointer as well,
22952 find the register that contains it. */
22953 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
22955 /* Move it into the stack register. */
22956 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
22958 /* At this point we have popped all necessary registers, so
22959 do not worry about restoring regs_available_for_popping
22960 to its correct value:
22962 assert (pops_needed == 0)
22963 assert (regs_available_for_popping == (1 << frame_pointer))
22964 assert (regs_to_pop == (1 << STACK_POINTER)) */
22968 /* Since we have just move the popped value into the frame
22969 pointer, the popping register is available for reuse, and
22970 we know that we still have the stack pointer left to pop. */
22971 regs_available_for_popping
|= (1 << frame_pointer
);
22975 /* If we still have registers left on the stack, but we no longer have
22976 any registers into which we can pop them, then we must move the return
22977 address into the link register and make available the register that
22979 if (regs_available_for_popping
== 0 && pops_needed
> 0)
22981 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
22983 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
22984 reg_containing_return_addr
);
22986 reg_containing_return_addr
= LR_REGNUM
;
22989 /* If we have registers left on the stack then pop some more.
22990 We know that at most we will want to pop FP and SP. */
22991 if (pops_needed
> 0)
22996 thumb_pop (f
, regs_available_for_popping
);
22998 /* We have popped either FP or SP.
22999 Move whichever one it is into the correct register. */
23000 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23001 move_to
= number_of_first_bit_set (regs_to_pop
);
23003 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
23005 regs_to_pop
&= ~(1 << move_to
);
23010 /* If we still have not popped everything then we must have only
23011 had one register available to us and we are now popping the SP. */
23012 if (pops_needed
> 0)
23016 thumb_pop (f
, regs_available_for_popping
);
23018 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
23020 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
23022 assert (regs_to_pop == (1 << STACK_POINTER))
23023 assert (pops_needed == 1)
23027 /* If necessary restore the a4 register. */
23030 if (reg_containing_return_addr
!= LR_REGNUM
)
23032 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23033 reg_containing_return_addr
= LR_REGNUM
;
23036 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
23039 if (crtl
->calls_eh_return
)
23040 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23042 /* Return to caller. */
23043 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23046 /* Scan INSN just before assembler is output for it.
23047 For Thumb-1, we track the status of the condition codes; this
23048 information is used in the cbranchsi4_insn pattern. */
23050 thumb1_final_prescan_insn (rtx_insn
*insn
)
23052 if (flag_print_asm_name
)
23053 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
23054 INSN_ADDRESSES (INSN_UID (insn
)));
23055 /* Don't overwrite the previous setter when we get to a cbranch. */
23056 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
23058 enum attr_conds conds
;
23060 if (cfun
->machine
->thumb1_cc_insn
)
23062 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
23063 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
23066 conds
= get_attr_conds (insn
);
23067 if (conds
== CONDS_SET
)
23069 rtx set
= single_set (insn
);
23070 cfun
->machine
->thumb1_cc_insn
= insn
;
23071 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
23072 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
23073 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
23074 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
23076 rtx src1
= XEXP (SET_SRC (set
), 1);
23077 if (src1
== const0_rtx
)
23078 cfun
->machine
->thumb1_cc_mode
= CCmode
;
23080 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
23082 /* Record the src register operand instead of dest because
23083 cprop_hardreg pass propagates src. */
23084 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
23087 else if (conds
!= CONDS_NOCOND
)
23088 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
23091 /* Check if unexpected far jump is used. */
23092 if (cfun
->machine
->lr_save_eliminated
23093 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23094 internal_error("Unexpected thumb1 far jump");
23098 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
23100 unsigned HOST_WIDE_INT mask
= 0xff;
23103 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
23104 if (val
== 0) /* XXX */
23107 for (i
= 0; i
< 25; i
++)
23108 if ((val
& (mask
<< i
)) == val
)
23114 /* Returns nonzero if the current function contains,
23115 or might contain a far jump. */
23117 thumb_far_jump_used_p (void)
23120 bool far_jump
= false;
23121 unsigned int func_size
= 0;
23123 /* This test is only important for leaf functions. */
23124 /* assert (!leaf_function_p ()); */
23126 /* If we have already decided that far jumps may be used,
23127 do not bother checking again, and always return true even if
23128 it turns out that they are not being used. Once we have made
23129 the decision that far jumps are present (and that hence the link
23130 register will be pushed onto the stack) we cannot go back on it. */
23131 if (cfun
->machine
->far_jump_used
)
23134 /* If this function is not being called from the prologue/epilogue
23135 generation code then it must be being called from the
23136 INITIAL_ELIMINATION_OFFSET macro. */
23137 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
23139 /* In this case we know that we are being asked about the elimination
23140 of the arg pointer register. If that register is not being used,
23141 then there are no arguments on the stack, and we do not have to
23142 worry that a far jump might force the prologue to push the link
23143 register, changing the stack offsets. In this case we can just
23144 return false, since the presence of far jumps in the function will
23145 not affect stack offsets.
23147 If the arg pointer is live (or if it was live, but has now been
23148 eliminated and so set to dead) then we do have to test to see if
23149 the function might contain a far jump. This test can lead to some
23150 false negatives, since before reload is completed, then length of
23151 branch instructions is not known, so gcc defaults to returning their
23152 longest length, which in turn sets the far jump attribute to true.
23154 A false negative will not result in bad code being generated, but it
23155 will result in a needless push and pop of the link register. We
23156 hope that this does not occur too often.
23158 If we need doubleword stack alignment this could affect the other
23159 elimination offsets so we can't risk getting it wrong. */
23160 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
23161 cfun
->machine
->arg_pointer_live
= 1;
23162 else if (!cfun
->machine
->arg_pointer_live
)
23166 /* We should not change far_jump_used during or after reload, as there is
23167 no chance to change stack frame layout. */
23168 if (reload_in_progress
|| reload_completed
)
23171 /* Check to see if the function contains a branch
23172 insn with the far jump attribute set. */
23173 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23175 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
23179 func_size
+= get_attr_length (insn
);
23182 /* Attribute far_jump will always be true for thumb1 before
23183 shorten_branch pass. So checking far_jump attribute before
23184 shorten_branch isn't much useful.
23186 Following heuristic tries to estimate more accurately if a far jump
23187 may finally be used. The heuristic is very conservative as there is
23188 no chance to roll-back the decision of not to use far jump.
23190 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23191 2-byte insn is associated with a 4 byte constant pool. Using
23192 function size 2048/3 as the threshold is conservative enough. */
23195 if ((func_size
* 3) >= 2048)
23197 /* Record the fact that we have decided that
23198 the function does use far jumps. */
23199 cfun
->machine
->far_jump_used
= 1;
23207 /* Return nonzero if FUNC must be entered in ARM mode. */
23209 is_called_in_ARM_mode (tree func
)
23211 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
23213 /* Ignore the problem about functions whose address is taken. */
23214 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
23218 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
23224 /* Given the stack offsets and register mask in OFFSETS, decide how
23225 many additional registers to push instead of subtracting a constant
23226 from SP. For epilogues the principle is the same except we use pop.
23227 FOR_PROLOGUE indicates which we're generating. */
23229 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
23231 HOST_WIDE_INT amount
;
23232 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
23233 /* Extract a mask of the ones we can give to the Thumb's push/pop
23235 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
23236 /* Then count how many other high registers will need to be pushed. */
23237 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23238 int n_free
, reg_base
, size
;
23240 if (!for_prologue
&& frame_pointer_needed
)
23241 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23243 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23245 /* If the stack frame size is 512 exactly, we can save one load
23246 instruction, which should make this a win even when optimizing
23248 if (!optimize_size
&& amount
!= 512)
23251 /* Can't do this if there are high registers to push. */
23252 if (high_regs_pushed
!= 0)
23255 /* Shouldn't do it in the prologue if no registers would normally
23256 be pushed at all. In the epilogue, also allow it if we'll have
23257 a pop insn for the PC. */
23260 || TARGET_BACKTRACE
23261 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
23262 || TARGET_INTERWORK
23263 || crtl
->args
.pretend_args_size
!= 0))
23266 /* Don't do this if thumb_expand_prologue wants to emit instructions
23267 between the push and the stack frame allocation. */
23269 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23270 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
23277 size
= arm_size_return_regs ();
23278 reg_base
= ARM_NUM_INTS (size
);
23279 live_regs_mask
>>= reg_base
;
23282 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
23283 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
23285 live_regs_mask
>>= 1;
23291 gcc_assert (amount
/ 4 * 4 == amount
);
23293 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
23294 return (amount
- 508) / 4;
23295 if (amount
<= n_free
* 4)
23300 /* The bits which aren't usefully expanded as rtl. */
23302 thumb1_unexpanded_epilogue (void)
23304 arm_stack_offsets
*offsets
;
23306 unsigned long live_regs_mask
= 0;
23307 int high_regs_pushed
= 0;
23309 int had_to_push_lr
;
23312 if (cfun
->machine
->return_used_this_function
!= 0)
23315 if (IS_NAKED (arm_current_func_type ()))
23318 offsets
= arm_get_frame_offsets ();
23319 live_regs_mask
= offsets
->saved_regs_mask
;
23320 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23322 /* If we can deduce the registers used from the function's return value.
23323 This is more reliable that examining df_regs_ever_live_p () because that
23324 will be set if the register is ever used in the function, not just if
23325 the register is used to hold a return value. */
23326 size
= arm_size_return_regs ();
23328 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
23331 unsigned long extra_mask
= (1 << extra_pop
) - 1;
23332 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
23335 /* The prolog may have pushed some high registers to use as
23336 work registers. e.g. the testsuite file:
23337 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23338 compiles to produce:
23339 push {r4, r5, r6, r7, lr}
23343 as part of the prolog. We have to undo that pushing here. */
23345 if (high_regs_pushed
)
23347 unsigned long mask
= live_regs_mask
& 0xff;
23350 /* The available low registers depend on the size of the value we are
23358 /* Oh dear! We have no low registers into which we can pop
23361 ("no low registers available for popping high registers");
23363 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
23364 if (live_regs_mask
& (1 << next_hi_reg
))
23367 while (high_regs_pushed
)
23369 /* Find lo register(s) into which the high register(s) can
23371 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
23373 if (mask
& (1 << regno
))
23374 high_regs_pushed
--;
23375 if (high_regs_pushed
== 0)
23379 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
23381 /* Pop the values into the low register(s). */
23382 thumb_pop (asm_out_file
, mask
);
23384 /* Move the value(s) into the high registers. */
23385 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
23387 if (mask
& (1 << regno
))
23389 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
23392 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
23393 if (live_regs_mask
& (1 << next_hi_reg
))
23398 live_regs_mask
&= ~0x0f00;
23401 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
23402 live_regs_mask
&= 0xff;
23404 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
23406 /* Pop the return address into the PC. */
23407 if (had_to_push_lr
)
23408 live_regs_mask
|= 1 << PC_REGNUM
;
23410 /* Either no argument registers were pushed or a backtrace
23411 structure was created which includes an adjusted stack
23412 pointer, so just pop everything. */
23413 if (live_regs_mask
)
23414 thumb_pop (asm_out_file
, live_regs_mask
);
23416 /* We have either just popped the return address into the
23417 PC or it is was kept in LR for the entire function.
23418 Note that thumb_pop has already called thumb_exit if the
23419 PC was in the list. */
23420 if (!had_to_push_lr
)
23421 thumb_exit (asm_out_file
, LR_REGNUM
);
23425 /* Pop everything but the return address. */
23426 if (live_regs_mask
)
23427 thumb_pop (asm_out_file
, live_regs_mask
);
23429 if (had_to_push_lr
)
23433 /* We have no free low regs, so save one. */
23434 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
23438 /* Get the return address into a temporary register. */
23439 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
23443 /* Move the return address to lr. */
23444 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
23446 /* Restore the low register. */
23447 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
23452 regno
= LAST_ARG_REGNUM
;
23457 /* Remove the argument registers that were pushed onto the stack. */
23458 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
23459 SP_REGNUM
, SP_REGNUM
,
23460 crtl
->args
.pretend_args_size
);
23462 thumb_exit (asm_out_file
, regno
);
23468 /* Functions to save and restore machine-specific function data. */
23469 static struct machine_function
*
23470 arm_init_machine_status (void)
23472 struct machine_function
*machine
;
23473 machine
= ggc_cleared_alloc
<machine_function
> ();
23475 #if ARM_FT_UNKNOWN != 0
23476 machine
->func_type
= ARM_FT_UNKNOWN
;
23481 /* Return an RTX indicating where the return address to the
23482 calling function can be found. */
23484 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
23489 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
23492 /* Do anything needed before RTL is emitted for each function. */
23494 arm_init_expanders (void)
23496 /* Arrange to initialize and mark the machine per-function status. */
23497 init_machine_status
= arm_init_machine_status
;
23499 /* This is to stop the combine pass optimizing away the alignment
23500 adjustment of va_arg. */
23501 /* ??? It is claimed that this should not be necessary. */
23503 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
23506 /* Check that FUNC is called with a different mode. */
23509 arm_change_mode_p (tree func
)
23511 if (TREE_CODE (func
) != FUNCTION_DECL
)
23514 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
23517 callee_tree
= target_option_default_node
;
23519 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
23520 int flags
= callee_opts
->x_target_flags
;
23522 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
23525 /* Like arm_compute_initial_elimination offset. Simpler because there
23526 isn't an ABI specified frame pointer for Thumb. Instead, we set it
23527 to point at the base of the local variables after static stack
23528 space for a function has been allocated. */
23531 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
23533 arm_stack_offsets
*offsets
;
23535 offsets
= arm_get_frame_offsets ();
23539 case ARG_POINTER_REGNUM
:
23542 case STACK_POINTER_REGNUM
:
23543 return offsets
->outgoing_args
- offsets
->saved_args
;
23545 case FRAME_POINTER_REGNUM
:
23546 return offsets
->soft_frame
- offsets
->saved_args
;
23548 case ARM_HARD_FRAME_POINTER_REGNUM
:
23549 return offsets
->saved_regs
- offsets
->saved_args
;
23551 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23552 return offsets
->locals_base
- offsets
->saved_args
;
23555 gcc_unreachable ();
23559 case FRAME_POINTER_REGNUM
:
23562 case STACK_POINTER_REGNUM
:
23563 return offsets
->outgoing_args
- offsets
->soft_frame
;
23565 case ARM_HARD_FRAME_POINTER_REGNUM
:
23566 return offsets
->saved_regs
- offsets
->soft_frame
;
23568 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23569 return offsets
->locals_base
- offsets
->soft_frame
;
23572 gcc_unreachable ();
23577 gcc_unreachable ();
23581 /* Generate the function's prologue. */
23584 thumb1_expand_prologue (void)
23588 HOST_WIDE_INT amount
;
23589 HOST_WIDE_INT size
;
23590 arm_stack_offsets
*offsets
;
23591 unsigned long func_type
;
23593 unsigned long live_regs_mask
;
23594 unsigned long l_mask
;
23595 unsigned high_regs_pushed
= 0;
23597 func_type
= arm_current_func_type ();
23599 /* Naked functions don't have prologues. */
23600 if (IS_NAKED (func_type
))
23602 if (flag_stack_usage_info
)
23603 current_function_static_stack_size
= 0;
23607 if (IS_INTERRUPT (func_type
))
23609 error ("interrupt Service Routines cannot be coded in Thumb mode");
23613 if (is_called_in_ARM_mode (current_function_decl
))
23614 emit_insn (gen_prologue_thumb1_interwork ());
23616 offsets
= arm_get_frame_offsets ();
23617 live_regs_mask
= offsets
->saved_regs_mask
;
23619 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
23620 l_mask
= live_regs_mask
& 0x40ff;
23621 /* Then count how many other high registers will need to be pushed. */
23622 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
23624 if (crtl
->args
.pretend_args_size
)
23626 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
23628 if (cfun
->machine
->uses_anonymous_args
)
23630 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
23631 unsigned long mask
;
23633 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
23634 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
23636 insn
= thumb1_emit_multi_reg_push (mask
, 0);
23640 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
23641 stack_pointer_rtx
, x
));
23643 RTX_FRAME_RELATED_P (insn
) = 1;
23646 if (TARGET_BACKTRACE
)
23648 HOST_WIDE_INT offset
= 0;
23649 unsigned work_register
;
23650 rtx work_reg
, x
, arm_hfp_rtx
;
23652 /* We have been asked to create a stack backtrace structure.
23653 The code looks like this:
23657 0 sub SP, #16 Reserve space for 4 registers.
23658 2 push {R7} Push low registers.
23659 4 add R7, SP, #20 Get the stack pointer before the push.
23660 6 str R7, [SP, #8] Store the stack pointer
23661 (before reserving the space).
23662 8 mov R7, PC Get hold of the start of this code + 12.
23663 10 str R7, [SP, #16] Store it.
23664 12 mov R7, FP Get hold of the current frame pointer.
23665 14 str R7, [SP, #4] Store it.
23666 16 mov R7, LR Get hold of the current return address.
23667 18 str R7, [SP, #12] Store it.
23668 20 add R7, SP, #16 Point at the start of the
23669 backtrace structure.
23670 22 mov FP, R7 Put this value into the frame pointer. */
23672 work_register
= thumb_find_work_register (live_regs_mask
);
23673 work_reg
= gen_rtx_REG (SImode
, work_register
);
23674 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
23676 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
23677 stack_pointer_rtx
, GEN_INT (-16)));
23678 RTX_FRAME_RELATED_P (insn
) = 1;
23682 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
23683 RTX_FRAME_RELATED_P (insn
) = 1;
23685 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
23688 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
23689 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
23691 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
23692 x
= gen_frame_mem (SImode
, x
);
23693 emit_move_insn (x
, work_reg
);
23695 /* Make sure that the instruction fetching the PC is in the right place
23696 to calculate "start of backtrace creation code + 12". */
23697 /* ??? The stores using the common WORK_REG ought to be enough to
23698 prevent the scheduler from doing anything weird. Failing that
23699 we could always move all of the following into an UNSPEC_VOLATILE. */
23702 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
23703 emit_move_insn (work_reg
, x
);
23705 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
23706 x
= gen_frame_mem (SImode
, x
);
23707 emit_move_insn (x
, work_reg
);
23709 emit_move_insn (work_reg
, arm_hfp_rtx
);
23711 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
23712 x
= gen_frame_mem (SImode
, x
);
23713 emit_move_insn (x
, work_reg
);
23717 emit_move_insn (work_reg
, arm_hfp_rtx
);
23719 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
23720 x
= gen_frame_mem (SImode
, x
);
23721 emit_move_insn (x
, work_reg
);
23723 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
23724 emit_move_insn (work_reg
, x
);
23726 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
23727 x
= gen_frame_mem (SImode
, x
);
23728 emit_move_insn (x
, work_reg
);
23731 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
23732 emit_move_insn (work_reg
, x
);
23734 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
23735 x
= gen_frame_mem (SImode
, x
);
23736 emit_move_insn (x
, work_reg
);
23738 x
= GEN_INT (offset
+ 12);
23739 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
23741 emit_move_insn (arm_hfp_rtx
, work_reg
);
23743 /* Optimization: If we are not pushing any low registers but we are going
23744 to push some high registers then delay our first push. This will just
23745 be a push of LR and we can combine it with the push of the first high
23747 else if ((l_mask
& 0xff) != 0
23748 || (high_regs_pushed
== 0 && l_mask
))
23750 unsigned long mask
= l_mask
;
23751 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
23752 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
23753 RTX_FRAME_RELATED_P (insn
) = 1;
23756 if (high_regs_pushed
)
23758 unsigned pushable_regs
;
23759 unsigned next_hi_reg
;
23760 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
23761 : crtl
->args
.info
.nregs
;
23762 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
23764 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
23765 if (live_regs_mask
& (1 << next_hi_reg
))
23768 /* Here we need to mask out registers used for passing arguments
23769 even if they can be pushed. This is to avoid using them to stash the high
23770 registers. Such kind of stash may clobber the use of arguments. */
23771 pushable_regs
= l_mask
& (~arg_regs_mask
) & 0xff;
23773 if (pushable_regs
== 0)
23774 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
23776 while (high_regs_pushed
> 0)
23778 unsigned long real_regs_mask
= 0;
23780 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
23782 if (pushable_regs
& (1 << regno
))
23784 emit_move_insn (gen_rtx_REG (SImode
, regno
),
23785 gen_rtx_REG (SImode
, next_hi_reg
));
23787 high_regs_pushed
--;
23788 real_regs_mask
|= (1 << next_hi_reg
);
23790 if (high_regs_pushed
)
23792 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
23794 if (live_regs_mask
& (1 << next_hi_reg
))
23799 pushable_regs
&= ~((1 << regno
) - 1);
23805 /* If we had to find a work register and we have not yet
23806 saved the LR then add it to the list of regs to push. */
23807 if (l_mask
== (1 << LR_REGNUM
))
23809 pushable_regs
|= l_mask
;
23810 real_regs_mask
|= l_mask
;
23814 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
23815 RTX_FRAME_RELATED_P (insn
) = 1;
23819 /* Load the pic register before setting the frame pointer,
23820 so we can use r7 as a temporary work register. */
23821 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23822 arm_load_pic_register (live_regs_mask
);
23824 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
23825 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
23826 stack_pointer_rtx
);
23828 size
= offsets
->outgoing_args
- offsets
->saved_args
;
23829 if (flag_stack_usage_info
)
23830 current_function_static_stack_size
= size
;
23832 /* If we have a frame, then do stack checking. FIXME: not implemented. */
23833 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
23834 sorry ("-fstack-check=specific for Thumb-1");
23836 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23837 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
23842 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23843 GEN_INT (- amount
)));
23844 RTX_FRAME_RELATED_P (insn
) = 1;
23850 /* The stack decrement is too big for an immediate value in a single
23851 insn. In theory we could issue multiple subtracts, but after
23852 three of them it becomes more space efficient to place the full
23853 value in the constant pool and load into a register. (Also the
23854 ARM debugger really likes to see only one stack decrement per
23855 function). So instead we look for a scratch register into which
23856 we can load the decrement, and then we subtract this from the
23857 stack pointer. Unfortunately on the thumb the only available
23858 scratch registers are the argument registers, and we cannot use
23859 these as they may hold arguments to the function. Instead we
23860 attempt to locate a call preserved register which is used by this
23861 function. If we can find one, then we know that it will have
23862 been pushed at the start of the prologue and so we can corrupt
23864 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
23865 if (live_regs_mask
& (1 << regno
))
23868 gcc_assert(regno
<= LAST_LO_REGNUM
);
23870 reg
= gen_rtx_REG (SImode
, regno
);
23872 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
23874 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
23875 stack_pointer_rtx
, reg
));
23877 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23878 plus_constant (Pmode
, stack_pointer_rtx
,
23880 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23881 RTX_FRAME_RELATED_P (insn
) = 1;
23885 if (frame_pointer_needed
)
23886 thumb_set_frame_pointer (offsets
);
23888 /* If we are profiling, make sure no instructions are scheduled before
23889 the call to mcount. Similarly if the user has requested no
23890 scheduling in the prolog. Similarly if we want non-call exceptions
23891 using the EABI unwinder, to prevent faulting instructions from being
23892 swapped with a stack adjustment. */
23893 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
23894 || (arm_except_unwind_info (&global_options
) == UI_TARGET
23895 && cfun
->can_throw_non_call_exceptions
))
23896 emit_insn (gen_blockage ());
23898 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
23899 if (live_regs_mask
& 0xff)
23900 cfun
->machine
->lr_save_eliminated
= 0;
23903 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
23904 POP instruction can be generated. LR should be replaced by PC. All
23905 the checks required are already done by USE_RETURN_INSN (). Hence,
23906 all we really need to check here is if single register is to be
23907 returned, or multiple register return. */
23909 thumb2_expand_return (bool simple_return
)
23912 unsigned long saved_regs_mask
;
23913 arm_stack_offsets
*offsets
;
23915 offsets
= arm_get_frame_offsets ();
23916 saved_regs_mask
= offsets
->saved_regs_mask
;
23918 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
23919 if (saved_regs_mask
& (1 << i
))
23922 if (!simple_return
&& saved_regs_mask
)
23926 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
23927 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
23928 rtx addr
= gen_rtx_MEM (SImode
,
23929 gen_rtx_POST_INC (SImode
,
23930 stack_pointer_rtx
));
23931 set_mem_alias_set (addr
, get_frame_alias_set ());
23932 XVECEXP (par
, 0, 0) = ret_rtx
;
23933 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
23934 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
23935 emit_jump_insn (par
);
23939 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
23940 saved_regs_mask
|= (1 << PC_REGNUM
);
23941 arm_emit_multi_reg_pop (saved_regs_mask
);
23946 emit_jump_insn (simple_return_rtx
);
23951 thumb1_expand_epilogue (void)
23953 HOST_WIDE_INT amount
;
23954 arm_stack_offsets
*offsets
;
23957 /* Naked functions don't have prologues. */
23958 if (IS_NAKED (arm_current_func_type ()))
23961 offsets
= arm_get_frame_offsets ();
23962 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
23964 if (frame_pointer_needed
)
23966 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
23967 amount
= offsets
->locals_base
- offsets
->saved_regs
;
23969 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
23971 gcc_assert (amount
>= 0);
23974 emit_insn (gen_blockage ());
23977 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23978 GEN_INT (amount
)));
23981 /* r3 is always free in the epilogue. */
23982 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
23984 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
23985 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
23989 /* Emit a USE (stack_pointer_rtx), so that
23990 the stack adjustment will not be deleted. */
23991 emit_insn (gen_force_register_use (stack_pointer_rtx
));
23993 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
23994 emit_insn (gen_blockage ());
23996 /* Emit a clobber for each insn that will be restored in the epilogue,
23997 so that flow2 will get register lifetimes correct. */
23998 for (regno
= 0; regno
< 13; regno
++)
23999 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
24000 emit_clobber (gen_rtx_REG (SImode
, regno
));
24002 if (! df_regs_ever_live_p (LR_REGNUM
))
24003 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
24006 /* Epilogue code for APCS frame. */
24008 arm_expand_epilogue_apcs_frame (bool really_return
)
24010 unsigned long func_type
;
24011 unsigned long saved_regs_mask
;
24014 int floats_from_frame
= 0;
24015 arm_stack_offsets
*offsets
;
24017 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
24018 func_type
= arm_current_func_type ();
24020 /* Get frame offsets for ARM. */
24021 offsets
= arm_get_frame_offsets ();
24022 saved_regs_mask
= offsets
->saved_regs_mask
;
24024 /* Find the offset of the floating-point save area in the frame. */
24026 = (offsets
->saved_args
24027 + arm_compute_static_chain_stack_bytes ()
24030 /* Compute how many core registers saved and how far away the floats are. */
24031 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24032 if (saved_regs_mask
& (1 << i
))
24035 floats_from_frame
+= 4;
24038 if (TARGET_HARD_FLOAT
)
24041 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
24043 /* The offset is from IP_REGNUM. */
24044 int saved_size
= arm_get_vfp_saved_size ();
24045 if (saved_size
> 0)
24048 floats_from_frame
+= saved_size
;
24049 insn
= emit_insn (gen_addsi3 (ip_rtx
,
24050 hard_frame_pointer_rtx
,
24051 GEN_INT (-floats_from_frame
)));
24052 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
24053 ip_rtx
, hard_frame_pointer_rtx
);
24056 /* Generate VFP register multi-pop. */
24057 start_reg
= FIRST_VFP_REGNUM
;
24059 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
24060 /* Look for a case where a reg does not need restoring. */
24061 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24062 && (!df_regs_ever_live_p (i
+ 1)
24063 || call_used_regs
[i
+ 1]))
24065 if (start_reg
!= i
)
24066 arm_emit_vfp_multi_reg_pop (start_reg
,
24067 (i
- start_reg
) / 2,
24068 gen_rtx_REG (SImode
,
24073 /* Restore the remaining regs that we have discovered (or possibly
24074 even all of them, if the conditional in the for loop never
24076 if (start_reg
!= i
)
24077 arm_emit_vfp_multi_reg_pop (start_reg
,
24078 (i
- start_reg
) / 2,
24079 gen_rtx_REG (SImode
, IP_REGNUM
));
24084 /* The frame pointer is guaranteed to be non-double-word aligned, as
24085 it is set to double-word-aligned old_stack_pointer - 4. */
24087 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
24089 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
24090 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24092 rtx addr
= gen_frame_mem (V2SImode
,
24093 plus_constant (Pmode
, hard_frame_pointer_rtx
,
24095 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24096 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24097 gen_rtx_REG (V2SImode
, i
),
24103 /* saved_regs_mask should contain IP which contains old stack pointer
24104 at the time of activation creation. Since SP and IP are adjacent registers,
24105 we can restore the value directly into SP. */
24106 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
24107 saved_regs_mask
&= ~(1 << IP_REGNUM
);
24108 saved_regs_mask
|= (1 << SP_REGNUM
);
24110 /* There are two registers left in saved_regs_mask - LR and PC. We
24111 only need to restore LR (the return address), but to
24112 save time we can load it directly into PC, unless we need a
24113 special function exit sequence, or we are not really returning. */
24115 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
24116 && !crtl
->calls_eh_return
)
24117 /* Delete LR from the register mask, so that LR on
24118 the stack is loaded into the PC in the register mask. */
24119 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24121 saved_regs_mask
&= ~(1 << PC_REGNUM
);
24123 num_regs
= bit_count (saved_regs_mask
);
24124 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
24127 emit_insn (gen_blockage ());
24128 /* Unwind the stack to just below the saved registers. */
24129 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24130 hard_frame_pointer_rtx
,
24131 GEN_INT (- 4 * num_regs
)));
24133 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
24134 stack_pointer_rtx
, hard_frame_pointer_rtx
);
24137 arm_emit_multi_reg_pop (saved_regs_mask
);
24139 if (IS_INTERRUPT (func_type
))
24141 /* Interrupt handlers will have pushed the
24142 IP onto the stack, so restore it now. */
24144 rtx addr
= gen_rtx_MEM (SImode
,
24145 gen_rtx_POST_INC (SImode
,
24146 stack_pointer_rtx
));
24147 set_mem_alias_set (addr
, get_frame_alias_set ());
24148 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
24149 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24150 gen_rtx_REG (SImode
, IP_REGNUM
),
24154 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
24157 if (crtl
->calls_eh_return
)
24158 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24160 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24162 if (IS_STACKALIGN (func_type
))
24163 /* Restore the original stack pointer. Before prologue, the stack was
24164 realigned and the original stack pointer saved in r0. For details,
24165 see comment in arm_expand_prologue. */
24166 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
24168 emit_jump_insn (simple_return_rtx
);
24171 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24172 function is not a sibcall. */
24174 arm_expand_epilogue (bool really_return
)
24176 unsigned long func_type
;
24177 unsigned long saved_regs_mask
;
24181 arm_stack_offsets
*offsets
;
24183 func_type
= arm_current_func_type ();
24185 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24186 let output_return_instruction take care of instruction emission if any. */
24187 if (IS_NAKED (func_type
)
24188 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
24191 emit_jump_insn (simple_return_rtx
);
24195 /* If we are throwing an exception, then we really must be doing a
24196 return, so we can't tail-call. */
24197 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
24199 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
24201 arm_expand_epilogue_apcs_frame (really_return
);
24205 /* Get frame offsets for ARM. */
24206 offsets
= arm_get_frame_offsets ();
24207 saved_regs_mask
= offsets
->saved_regs_mask
;
24208 num_regs
= bit_count (saved_regs_mask
);
24210 if (frame_pointer_needed
)
24213 /* Restore stack pointer if necessary. */
24216 /* In ARM mode, frame pointer points to first saved register.
24217 Restore stack pointer to last saved register. */
24218 amount
= offsets
->frame
- offsets
->saved_regs
;
24220 /* Force out any pending memory operations that reference stacked data
24221 before stack de-allocation occurs. */
24222 emit_insn (gen_blockage ());
24223 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24224 hard_frame_pointer_rtx
,
24225 GEN_INT (amount
)));
24226 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24228 hard_frame_pointer_rtx
);
24230 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24232 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24236 /* In Thumb-2 mode, the frame pointer points to the last saved
24238 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24241 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
24242 hard_frame_pointer_rtx
,
24243 GEN_INT (amount
)));
24244 arm_add_cfa_adjust_cfa_note (insn
, amount
,
24245 hard_frame_pointer_rtx
,
24246 hard_frame_pointer_rtx
);
24249 /* Force out any pending memory operations that reference stacked data
24250 before stack de-allocation occurs. */
24251 emit_insn (gen_blockage ());
24252 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
24253 hard_frame_pointer_rtx
));
24254 arm_add_cfa_adjust_cfa_note (insn
, 0,
24256 hard_frame_pointer_rtx
);
24257 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24259 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24264 /* Pop off outgoing args and local frame to adjust stack pointer to
24265 last saved register. */
24266 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24270 /* Force out any pending memory operations that reference stacked data
24271 before stack de-allocation occurs. */
24272 emit_insn (gen_blockage ());
24273 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24275 GEN_INT (amount
)));
24276 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
24277 stack_pointer_rtx
, stack_pointer_rtx
);
24278 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24280 emit_insn (gen_force_register_use (stack_pointer_rtx
));
24284 if (TARGET_HARD_FLOAT
)
24286 /* Generate VFP register multi-pop. */
24287 int end_reg
= LAST_VFP_REGNUM
+ 1;
24289 /* Scan the registers in reverse order. We need to match
24290 any groupings made in the prologue and generate matching
24291 vldm operations. The need to match groups is because,
24292 unlike pop, vldm can only do consecutive regs. */
24293 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
24294 /* Look for a case where a reg does not need restoring. */
24295 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
24296 && (!df_regs_ever_live_p (i
+ 1)
24297 || call_used_regs
[i
+ 1]))
24299 /* Restore the regs discovered so far (from reg+2 to
24301 if (end_reg
> i
+ 2)
24302 arm_emit_vfp_multi_reg_pop (i
+ 2,
24303 (end_reg
- (i
+ 2)) / 2,
24304 stack_pointer_rtx
);
24308 /* Restore the remaining regs that we have discovered (or possibly
24309 even all of them, if the conditional in the for loop never
24311 if (end_reg
> i
+ 2)
24312 arm_emit_vfp_multi_reg_pop (i
+ 2,
24313 (end_reg
- (i
+ 2)) / 2,
24314 stack_pointer_rtx
);
24318 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
24319 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
24322 rtx addr
= gen_rtx_MEM (V2SImode
,
24323 gen_rtx_POST_INC (SImode
,
24324 stack_pointer_rtx
));
24325 set_mem_alias_set (addr
, get_frame_alias_set ());
24326 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
24327 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24328 gen_rtx_REG (V2SImode
, i
),
24330 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
24331 stack_pointer_rtx
, stack_pointer_rtx
);
24334 if (saved_regs_mask
)
24337 bool return_in_pc
= false;
24339 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
24340 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
24341 && !IS_STACKALIGN (func_type
)
24343 && crtl
->args
.pretend_args_size
== 0
24344 && saved_regs_mask
& (1 << LR_REGNUM
)
24345 && !crtl
->calls_eh_return
)
24347 saved_regs_mask
&= ~(1 << LR_REGNUM
);
24348 saved_regs_mask
|= (1 << PC_REGNUM
);
24349 return_in_pc
= true;
24352 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
24354 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
24355 if (saved_regs_mask
& (1 << i
))
24357 rtx addr
= gen_rtx_MEM (SImode
,
24358 gen_rtx_POST_INC (SImode
,
24359 stack_pointer_rtx
));
24360 set_mem_alias_set (addr
, get_frame_alias_set ());
24362 if (i
== PC_REGNUM
)
24364 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
24365 XVECEXP (insn
, 0, 0) = ret_rtx
;
24366 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
24368 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
24369 insn
= emit_jump_insn (insn
);
24373 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
24375 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
24376 gen_rtx_REG (SImode
, i
),
24378 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
24380 stack_pointer_rtx
);
24387 && current_tune
->prefer_ldrd_strd
24388 && !optimize_function_for_size_p (cfun
))
24391 thumb2_emit_ldrd_pop (saved_regs_mask
);
24392 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
24393 arm_emit_ldrd_pop (saved_regs_mask
);
24395 arm_emit_multi_reg_pop (saved_regs_mask
);
24398 arm_emit_multi_reg_pop (saved_regs_mask
);
24406 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
24410 rtx dwarf
= NULL_RTX
;
24412 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24414 GEN_INT (amount
)));
24416 RTX_FRAME_RELATED_P (tmp
) = 1;
24418 if (cfun
->machine
->uses_anonymous_args
)
24420 /* Restore pretend args. Refer arm_expand_prologue on how to save
24421 pretend_args in stack. */
24422 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
24423 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
24424 for (j
= 0, i
= 0; j
< num_regs
; i
++)
24425 if (saved_regs_mask
& (1 << i
))
24427 rtx reg
= gen_rtx_REG (SImode
, i
);
24428 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
24431 REG_NOTES (tmp
) = dwarf
;
24433 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
24434 stack_pointer_rtx
, stack_pointer_rtx
);
24437 if (!really_return
)
24440 if (crtl
->calls_eh_return
)
24441 emit_insn (gen_addsi3 (stack_pointer_rtx
,
24443 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
24445 if (IS_STACKALIGN (func_type
))
24446 /* Restore the original stack pointer. Before prologue, the stack was
24447 realigned and the original stack pointer saved in r0. For details,
24448 see comment in arm_expand_prologue. */
24449 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
24451 emit_jump_insn (simple_return_rtx
);
24454 /* Implementation of insn prologue_thumb1_interwork. This is the first
24455 "instruction" of a function called in ARM mode. Swap to thumb mode. */
24458 thumb1_output_interwork (void)
24461 FILE *f
= asm_out_file
;
24463 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
24464 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
24466 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
24468 /* Generate code sequence to switch us into Thumb mode. */
24469 /* The .code 32 directive has already been emitted by
24470 ASM_DECLARE_FUNCTION_NAME. */
24471 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
24472 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
24474 /* Generate a label, so that the debugger will notice the
24475 change in instruction sets. This label is also used by
24476 the assembler to bypass the ARM code when this function
24477 is called from a Thumb encoded function elsewhere in the
24478 same file. Hence the definition of STUB_NAME here must
24479 agree with the definition in gas/config/tc-arm.c. */
24481 #define STUB_NAME ".real_start_of"
24483 fprintf (f
, "\t.code\t16\n");
24485 if (arm_dllexport_name_p (name
))
24486 name
= arm_strip_name_encoding (name
);
24488 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
24489 fprintf (f
, "\t.thumb_func\n");
24490 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
24495 /* Handle the case of a double word load into a low register from
24496 a computed memory address. The computed address may involve a
24497 register which is overwritten by the load. */
24499 thumb_load_double_from_address (rtx
*operands
)
24507 gcc_assert (REG_P (operands
[0]));
24508 gcc_assert (MEM_P (operands
[1]));
24510 /* Get the memory address. */
24511 addr
= XEXP (operands
[1], 0);
24513 /* Work out how the memory address is computed. */
24514 switch (GET_CODE (addr
))
24517 operands
[2] = adjust_address (operands
[1], SImode
, 4);
24519 if (REGNO (operands
[0]) == REGNO (addr
))
24521 output_asm_insn ("ldr\t%H0, %2", operands
);
24522 output_asm_insn ("ldr\t%0, %1", operands
);
24526 output_asm_insn ("ldr\t%0, %1", operands
);
24527 output_asm_insn ("ldr\t%H0, %2", operands
);
24532 /* Compute <address> + 4 for the high order load. */
24533 operands
[2] = adjust_address (operands
[1], SImode
, 4);
24535 output_asm_insn ("ldr\t%0, %1", operands
);
24536 output_asm_insn ("ldr\t%H0, %2", operands
);
24540 arg1
= XEXP (addr
, 0);
24541 arg2
= XEXP (addr
, 1);
24543 if (CONSTANT_P (arg1
))
24544 base
= arg2
, offset
= arg1
;
24546 base
= arg1
, offset
= arg2
;
24548 gcc_assert (REG_P (base
));
24550 /* Catch the case of <address> = <reg> + <reg> */
24551 if (REG_P (offset
))
24553 int reg_offset
= REGNO (offset
);
24554 int reg_base
= REGNO (base
);
24555 int reg_dest
= REGNO (operands
[0]);
24557 /* Add the base and offset registers together into the
24558 higher destination register. */
24559 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
24560 reg_dest
+ 1, reg_base
, reg_offset
);
24562 /* Load the lower destination register from the address in
24563 the higher destination register. */
24564 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
24565 reg_dest
, reg_dest
+ 1);
24567 /* Load the higher destination register from its own address
24569 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
24570 reg_dest
+ 1, reg_dest
+ 1);
24574 /* Compute <address> + 4 for the high order load. */
24575 operands
[2] = adjust_address (operands
[1], SImode
, 4);
24577 /* If the computed address is held in the low order register
24578 then load the high order register first, otherwise always
24579 load the low order register first. */
24580 if (REGNO (operands
[0]) == REGNO (base
))
24582 output_asm_insn ("ldr\t%H0, %2", operands
);
24583 output_asm_insn ("ldr\t%0, %1", operands
);
24587 output_asm_insn ("ldr\t%0, %1", operands
);
24588 output_asm_insn ("ldr\t%H0, %2", operands
);
24594 /* With no registers to worry about we can just load the value
24596 operands
[2] = adjust_address (operands
[1], SImode
, 4);
24598 output_asm_insn ("ldr\t%H0, %2", operands
);
24599 output_asm_insn ("ldr\t%0, %1", operands
);
24603 gcc_unreachable ();
24610 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
24615 if (REGNO (operands
[4]) > REGNO (operands
[5]))
24616 std::swap (operands
[4], operands
[5]);
24618 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
24619 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
24623 if (REGNO (operands
[4]) > REGNO (operands
[5]))
24624 std::swap (operands
[4], operands
[5]);
24625 if (REGNO (operands
[5]) > REGNO (operands
[6]))
24626 std::swap (operands
[5], operands
[6]);
24627 if (REGNO (operands
[4]) > REGNO (operands
[5]))
24628 std::swap (operands
[4], operands
[5]);
24630 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
24631 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
24635 gcc_unreachable ();
24641 /* Output a call-via instruction for thumb state. */
24643 thumb_call_via_reg (rtx reg
)
24645 int regno
= REGNO (reg
);
24648 gcc_assert (regno
< LR_REGNUM
);
24650 /* If we are in the normal text section we can use a single instance
24651 per compilation unit. If we are doing function sections, then we need
24652 an entry per section, since we can't rely on reachability. */
24653 if (in_section
== text_section
)
24655 thumb_call_reg_needed
= 1;
24657 if (thumb_call_via_label
[regno
] == NULL
)
24658 thumb_call_via_label
[regno
] = gen_label_rtx ();
24659 labelp
= thumb_call_via_label
+ regno
;
24663 if (cfun
->machine
->call_via
[regno
] == NULL
)
24664 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
24665 labelp
= cfun
->machine
->call_via
+ regno
;
24668 output_asm_insn ("bl\t%a0", labelp
);
24672 /* Routines for generating rtl. */
24674 thumb_expand_movmemqi (rtx
*operands
)
24676 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
24677 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
24678 HOST_WIDE_INT len
= INTVAL (operands
[2]);
24679 HOST_WIDE_INT offset
= 0;
24683 emit_insn (gen_movmem12b (out
, in
, out
, in
));
24689 emit_insn (gen_movmem8b (out
, in
, out
, in
));
24695 rtx reg
= gen_reg_rtx (SImode
);
24696 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
24697 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
24704 rtx reg
= gen_reg_rtx (HImode
);
24705 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
24706 plus_constant (Pmode
, in
,
24708 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
24717 rtx reg
= gen_reg_rtx (QImode
);
24718 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
24719 plus_constant (Pmode
, in
,
24721 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
24728 thumb_reload_out_hi (rtx
*operands
)
24730 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
24733 /* Return the length of a function name prefix
24734 that starts with the character 'c'. */
24736 arm_get_strip_length (int c
)
24740 ARM_NAME_ENCODING_LENGTHS
24745 /* Return a pointer to a function's name with any
24746 and all prefix encodings stripped from it. */
24748 arm_strip_name_encoding (const char *name
)
24752 while ((skip
= arm_get_strip_length (* name
)))
24758 /* If there is a '*' anywhere in the name's prefix, then
24759 emit the stripped name verbatim, otherwise prepend an
24760 underscore if leading underscores are being used. */
24762 arm_asm_output_labelref (FILE *stream
, const char *name
)
24767 while ((skip
= arm_get_strip_length (* name
)))
24769 verbatim
|= (*name
== '*');
24774 fputs (name
, stream
);
24776 asm_fprintf (stream
, "%U%s", name
);
24779 /* This function is used to emit an EABI tag and its associated value.
24780 We emit the numerical value of the tag in case the assembler does not
24781 support textual tags. (Eg gas prior to 2.20). If requested we include
24782 the tag name in a comment so that anyone reading the assembler output
24783 will know which tag is being set.
24785 This function is not static because arm-c.c needs it too. */
24788 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
24790 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
24791 if (flag_verbose_asm
|| flag_debug_asm
)
24792 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
24793 asm_fprintf (asm_out_file
, "\n");
24796 /* This function is used to print CPU tuning information as comment
24797 in assembler file. Pointers are not printed for now. */
24800 arm_print_tune_info (void)
24802 asm_fprintf (asm_out_file
, "\t@.tune parameters\n");
24803 asm_fprintf (asm_out_file
, "\t\t@constant_limit:\t%d\n",
24804 current_tune
->constant_limit
);
24805 asm_fprintf (asm_out_file
, "\t\t@max_insns_skipped:\t%d\n",
24806 current_tune
->max_insns_skipped
);
24807 asm_fprintf (asm_out_file
, "\t\t@prefetch.num_slots:\t%d\n",
24808 current_tune
->prefetch
.num_slots
);
24809 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_size:\t%d\n",
24810 current_tune
->prefetch
.l1_cache_size
);
24811 asm_fprintf (asm_out_file
, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
24812 current_tune
->prefetch
.l1_cache_line_size
);
24813 asm_fprintf (asm_out_file
, "\t\t@prefer_constant_pool:\t%d\n",
24814 (int) current_tune
->prefer_constant_pool
);
24815 asm_fprintf (asm_out_file
, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
24816 asm_fprintf (asm_out_file
, "\t\t\t\ts&p\tcost\n");
24817 asm_fprintf (asm_out_file
, "\t\t\t\t00\t%d\n",
24818 current_tune
->branch_cost (false, false));
24819 asm_fprintf (asm_out_file
, "\t\t\t\t01\t%d\n",
24820 current_tune
->branch_cost (false, true));
24821 asm_fprintf (asm_out_file
, "\t\t\t\t10\t%d\n",
24822 current_tune
->branch_cost (true, false));
24823 asm_fprintf (asm_out_file
, "\t\t\t\t11\t%d\n",
24824 current_tune
->branch_cost (true, true));
24825 asm_fprintf (asm_out_file
, "\t\t@prefer_ldrd_strd:\t%d\n",
24826 (int) current_tune
->prefer_ldrd_strd
);
24827 asm_fprintf (asm_out_file
, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
24828 (int) current_tune
->logical_op_non_short_circuit_thumb
,
24829 (int) current_tune
->logical_op_non_short_circuit_arm
);
24830 asm_fprintf (asm_out_file
, "\t\t@prefer_neon_for_64bits:\t%d\n",
24831 (int) current_tune
->prefer_neon_for_64bits
);
24832 asm_fprintf (asm_out_file
,
24833 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
24834 (int) current_tune
->disparage_flag_setting_t16_encodings
);
24835 asm_fprintf (asm_out_file
, "\t\t@string_ops_prefer_neon:\t%d\n",
24836 (int) current_tune
->string_ops_prefer_neon
);
24837 asm_fprintf (asm_out_file
, "\t\t@max_insns_inline_memset:\t%d\n",
24838 current_tune
->max_insns_inline_memset
);
24839 asm_fprintf (asm_out_file
, "\t\t@fusible_ops:\t%u\n",
24840 current_tune
->fusible_ops
);
24841 asm_fprintf (asm_out_file
, "\t\t@sched_autopref:\t%d\n",
24842 (int) current_tune
->sched_autopref
);
24846 arm_file_start (void)
24852 if (arm_selected_arch
)
24854 /* armv7ve doesn't support any extensions. */
24855 if (strcmp (arm_selected_arch
->name
, "armv7ve") == 0)
24857 /* Keep backward compatability for assemblers
24858 which don't support armv7ve. */
24859 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
24860 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
24861 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
24862 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
24863 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
24867 const char* pos
= strchr (arm_selected_arch
->name
, '+');
24871 gcc_assert (strlen (arm_selected_arch
->name
)
24872 <= sizeof (buf
) / sizeof (*pos
));
24873 strncpy (buf
, arm_selected_arch
->name
,
24874 (pos
- arm_selected_arch
->name
) * sizeof (*pos
));
24875 buf
[pos
- arm_selected_arch
->name
] = '\0';
24876 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
24877 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
24880 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
24883 else if (strncmp (arm_selected_cpu
->name
, "generic", 7) == 0)
24884 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_cpu
->name
+ 8);
24887 const char* truncated_name
24888 = arm_rewrite_selected_cpu (arm_selected_cpu
->name
);
24889 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
24892 if (print_tune_info
)
24893 arm_print_tune_info ();
24895 if (! TARGET_SOFT_FLOAT
)
24897 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
24898 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
24900 if (TARGET_HARD_FLOAT_ABI
)
24901 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
24904 /* Some of these attributes only apply when the corresponding features
24905 are used. However we don't have any easy way of figuring this out.
24906 Conservatively record the setting that would have been used. */
24908 if (flag_rounding_math
)
24909 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
24911 if (!flag_unsafe_math_optimizations
)
24913 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
24914 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
24916 if (flag_signaling_nans
)
24917 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
24919 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
24920 flag_finite_math_only
? 1 : 3);
24922 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
24923 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
24924 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
24925 flag_short_enums
? 1 : 2);
24927 /* Tag_ABI_optimization_goals. */
24930 else if (optimize
>= 2)
24936 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
24938 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
24941 if (arm_fp16_format
)
24942 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
24943 (int) arm_fp16_format
);
24945 if (arm_lang_output_object_attributes_hook
)
24946 arm_lang_output_object_attributes_hook();
24949 default_file_start ();
24953 arm_file_end (void)
24957 if (NEED_INDICATE_EXEC_STACK
)
24958 /* Add .note.GNU-stack. */
24959 file_end_indicate_exec_stack ();
24961 if (! thumb_call_reg_needed
)
24964 switch_to_section (text_section
);
24965 asm_fprintf (asm_out_file
, "\t.code 16\n");
24966 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
24968 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
24970 rtx label
= thumb_call_via_label
[regno
];
24974 targetm
.asm_out
.internal_label (asm_out_file
, "L",
24975 CODE_LABEL_NUMBER (label
));
24976 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
24982 /* Symbols in the text segment can be accessed without indirecting via the
24983 constant pool; it may take an extra binary operation, but this is still
24984 faster than indirecting via memory. Don't do this when not optimizing,
24985 since we won't be calculating al of the offsets necessary to do this
24989 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
24991 if (optimize
> 0 && TREE_CONSTANT (decl
))
24992 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
24994 default_encode_section_info (decl
, rtl
, first
);
24996 #endif /* !ARM_PE */
24999 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
25001 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
25002 && !strcmp (prefix
, "L"))
25004 arm_ccfsm_state
= 0;
25005 arm_target_insn
= NULL
;
25007 default_internal_label (stream
, prefix
, labelno
);
25010 /* Output code to add DELTA to the first argument, and then jump
25011 to FUNCTION. Used for C++ multiple inheritance. */
25014 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
25015 HOST_WIDE_INT
, tree function
)
25017 static int thunk_label
= 0;
25020 int mi_delta
= delta
;
25021 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
25023 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
25026 mi_delta
= - mi_delta
;
25028 final_start_function (emit_barrier (), file
, 1);
25032 int labelno
= thunk_label
++;
25033 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
25034 /* Thunks are entered in arm mode when avaiable. */
25035 if (TARGET_THUMB1_ONLY
)
25037 /* push r3 so we can use it as a temporary. */
25038 /* TODO: Omit this save if r3 is not used. */
25039 fputs ("\tpush {r3}\n", file
);
25040 fputs ("\tldr\tr3, ", file
);
25044 fputs ("\tldr\tr12, ", file
);
25046 assemble_name (file
, label
);
25047 fputc ('\n', file
);
25050 /* If we are generating PIC, the ldr instruction below loads
25051 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25052 the address of the add + 8, so we have:
25054 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25057 Note that we have "+ 1" because some versions of GNU ld
25058 don't set the low bit of the result for R_ARM_REL32
25059 relocations against thumb function symbols.
25060 On ARMv6M this is +4, not +8. */
25061 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
25062 assemble_name (file
, labelpc
);
25063 fputs (":\n", file
);
25064 if (TARGET_THUMB1_ONLY
)
25066 /* This is 2 insns after the start of the thunk, so we know it
25067 is 4-byte aligned. */
25068 fputs ("\tadd\tr3, pc, r3\n", file
);
25069 fputs ("\tmov r12, r3\n", file
);
25072 fputs ("\tadd\tr12, pc, r12\n", file
);
25074 else if (TARGET_THUMB1_ONLY
)
25075 fputs ("\tmov r12, r3\n", file
);
25077 if (TARGET_THUMB1_ONLY
)
25079 if (mi_delta
> 255)
25081 fputs ("\tldr\tr3, ", file
);
25082 assemble_name (file
, label
);
25083 fputs ("+4\n", file
);
25084 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
25085 mi_op
, this_regno
, this_regno
);
25087 else if (mi_delta
!= 0)
25089 /* Thumb1 unified syntax requires s suffix in instruction name when
25090 one of the operands is immediate. */
25091 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
25092 mi_op
, this_regno
, this_regno
,
25098 /* TODO: Use movw/movt for large constants when available. */
25099 while (mi_delta
!= 0)
25101 if ((mi_delta
& (3 << shift
)) == 0)
25105 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
25106 mi_op
, this_regno
, this_regno
,
25107 mi_delta
& (0xff << shift
));
25108 mi_delta
&= ~(0xff << shift
);
25115 if (TARGET_THUMB1_ONLY
)
25116 fputs ("\tpop\t{r3}\n", file
);
25118 fprintf (file
, "\tbx\tr12\n");
25119 ASM_OUTPUT_ALIGN (file
, 2);
25120 assemble_name (file
, label
);
25121 fputs (":\n", file
);
25124 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25125 rtx tem
= XEXP (DECL_RTL (function
), 0);
25126 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25127 pipeline offset is four rather than eight. Adjust the offset
25129 tem
= plus_constant (GET_MODE (tem
), tem
,
25130 TARGET_THUMB1_ONLY
? -3 : -7);
25131 tem
= gen_rtx_MINUS (GET_MODE (tem
),
25133 gen_rtx_SYMBOL_REF (Pmode
,
25134 ggc_strdup (labelpc
)));
25135 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
25138 /* Output ".word .LTHUNKn". */
25139 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
25141 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
25142 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
25146 fputs ("\tb\t", file
);
25147 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
25148 if (NEED_PLT_RELOC
)
25149 fputs ("(PLT)", file
);
25150 fputc ('\n', file
);
25153 final_end_function ();
25156 /* MI thunk handling for TARGET_32BIT. */
25159 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
25160 HOST_WIDE_INT vcall_offset
, tree function
)
25162 /* On ARM, this_regno is R0 or R1 depending on
25163 whether the function returns an aggregate or not.
25165 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
25167 ? R1_REGNUM
: R0_REGNUM
);
25169 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
25170 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
25171 reload_completed
= 1;
25172 emit_note (NOTE_INSN_PROLOGUE_END
);
25174 /* Add DELTA to THIS_RTX. */
25176 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
25177 delta
, this_rtx
, this_rtx
, false);
25179 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
25180 if (vcall_offset
!= 0)
25182 /* Load *THIS_RTX. */
25183 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
25184 /* Compute *THIS_RTX + VCALL_OFFSET. */
25185 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
25187 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
25188 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
25189 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
25192 /* Generate a tail call to the target function. */
25193 if (!TREE_USED (function
))
25195 assemble_external (function
);
25196 TREE_USED (function
) = 1;
25198 rtx funexp
= XEXP (DECL_RTL (function
), 0);
25199 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
25200 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
25201 SIBLING_CALL_P (insn
) = 1;
25203 insn
= get_insns ();
25204 shorten_branches (insn
);
25205 final_start_function (insn
, file
, 1);
25206 final (insn
, file
, 1);
25207 final_end_function ();
25209 /* Stop pretending this is a post-reload pass. */
25210 reload_completed
= 0;
25213 /* Output code to add DELTA to the first argument, and then jump
25214 to FUNCTION. Used for C++ multiple inheritance. */
25217 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
25218 HOST_WIDE_INT vcall_offset
, tree function
)
25221 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
25223 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
25227 arm_emit_vector_const (FILE *file
, rtx x
)
25230 const char * pattern
;
25232 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
25234 switch (GET_MODE (x
))
25236 case V2SImode
: pattern
= "%08x"; break;
25237 case V4HImode
: pattern
= "%04x"; break;
25238 case V8QImode
: pattern
= "%02x"; break;
25239 default: gcc_unreachable ();
25242 fprintf (file
, "0x");
25243 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
25247 element
= CONST_VECTOR_ELT (x
, i
);
25248 fprintf (file
, pattern
, INTVAL (element
));
25254 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25255 HFmode constant pool entries are actually loaded with ldr. */
25257 arm_emit_fp16_const (rtx c
)
25261 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
25262 if (WORDS_BIG_ENDIAN
)
25263 assemble_zeros (2);
25264 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
25265 if (!WORDS_BIG_ENDIAN
)
25266 assemble_zeros (2);
25270 arm_output_load_gr (rtx
*operands
)
25277 if (!MEM_P (operands
[1])
25278 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
25279 || !REG_P (reg
= XEXP (sum
, 0))
25280 || !CONST_INT_P (offset
= XEXP (sum
, 1))
25281 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
25282 return "wldrw%?\t%0, %1";
25284 /* Fix up an out-of-range load of a GR register. */
25285 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
25286 wcgr
= operands
[0];
25288 output_asm_insn ("ldr%?\t%0, %1", operands
);
25290 operands
[0] = wcgr
;
25292 output_asm_insn ("tmcr%?\t%0, %1", operands
);
25293 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
25298 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25300 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25301 named arg and all anonymous args onto the stack.
25302 XXX I know the prologue shouldn't be pushing registers, but it is faster
25306 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
25310 int second_time ATTRIBUTE_UNUSED
)
25312 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
25315 cfun
->machine
->uses_anonymous_args
= 1;
25316 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
25318 nregs
= pcum
->aapcs_ncrn
;
25319 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
25323 nregs
= pcum
->nregs
;
25325 if (nregs
< NUM_ARG_REGS
)
25326 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
25329 /* We can't rely on the caller doing the proper promotion when
25330 using APCS or ATPCS. */
25333 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
25335 return !TARGET_AAPCS_BASED
;
25338 static machine_mode
25339 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
25341 int *punsignedp ATTRIBUTE_UNUSED
,
25342 const_tree fntype ATTRIBUTE_UNUSED
,
25343 int for_return ATTRIBUTE_UNUSED
)
25345 if (GET_MODE_CLASS (mode
) == MODE_INT
25346 && GET_MODE_SIZE (mode
) < 4)
25352 /* AAPCS based ABIs use short enums by default. */
25355 arm_default_short_enums (void)
25357 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
25361 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25364 arm_align_anon_bitfield (void)
25366 return TARGET_AAPCS_BASED
;
25370 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25373 arm_cxx_guard_type (void)
25375 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
25379 /* The EABI says test the least significant bit of a guard variable. */
25382 arm_cxx_guard_mask_bit (void)
25384 return TARGET_AAPCS_BASED
;
25388 /* The EABI specifies that all array cookies are 8 bytes long. */
25391 arm_get_cookie_size (tree type
)
25395 if (!TARGET_AAPCS_BASED
)
25396 return default_cxx_get_cookie_size (type
);
25398 size
= build_int_cst (sizetype
, 8);
25403 /* The EABI says that array cookies should also contain the element size. */
25406 arm_cookie_has_size (void)
25408 return TARGET_AAPCS_BASED
;
25412 /* The EABI says constructors and destructors should return a pointer to
25413 the object constructed/destroyed. */
25416 arm_cxx_cdtor_returns_this (void)
25418 return TARGET_AAPCS_BASED
;
25421 /* The EABI says that an inline function may never be the key
25425 arm_cxx_key_method_may_be_inline (void)
25427 return !TARGET_AAPCS_BASED
;
25431 arm_cxx_determine_class_data_visibility (tree decl
)
25433 if (!TARGET_AAPCS_BASED
25434 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
25437 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
25438 is exported. However, on systems without dynamic vague linkage,
25439 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
25440 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
25441 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
25443 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
25444 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
25448 arm_cxx_class_data_always_comdat (void)
25450 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
25451 vague linkage if the class has no key function. */
25452 return !TARGET_AAPCS_BASED
;
25456 /* The EABI says __aeabi_atexit should be used to register static
25460 arm_cxx_use_aeabi_atexit (void)
25462 return TARGET_AAPCS_BASED
;
25467 arm_set_return_address (rtx source
, rtx scratch
)
25469 arm_stack_offsets
*offsets
;
25470 HOST_WIDE_INT delta
;
25472 unsigned long saved_regs
;
25474 offsets
= arm_get_frame_offsets ();
25475 saved_regs
= offsets
->saved_regs_mask
;
25477 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
25478 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
25481 if (frame_pointer_needed
)
25482 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
25485 /* LR will be the first saved register. */
25486 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
25491 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
25492 GEN_INT (delta
& ~4095)));
25497 addr
= stack_pointer_rtx
;
25499 addr
= plus_constant (Pmode
, addr
, delta
);
25501 /* The store needs to be marked as frame related in order to prevent
25502 DSE from deleting it as dead if it is based on fp. */
25503 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
25504 RTX_FRAME_RELATED_P (insn
) = 1;
25505 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
25511 thumb_set_return_address (rtx source
, rtx scratch
)
25513 arm_stack_offsets
*offsets
;
25514 HOST_WIDE_INT delta
;
25515 HOST_WIDE_INT limit
;
25518 unsigned long mask
;
25522 offsets
= arm_get_frame_offsets ();
25523 mask
= offsets
->saved_regs_mask
;
25524 if (mask
& (1 << LR_REGNUM
))
25527 /* Find the saved regs. */
25528 if (frame_pointer_needed
)
25530 delta
= offsets
->soft_frame
- offsets
->saved_args
;
25531 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
25537 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
25540 /* Allow for the stack frame. */
25541 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
25543 /* The link register is always the first saved register. */
25546 /* Construct the address. */
25547 addr
= gen_rtx_REG (SImode
, reg
);
25550 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
25551 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
25555 addr
= plus_constant (Pmode
, addr
, delta
);
25557 /* The store needs to be marked as frame related in order to prevent
25558 DSE from deleting it as dead if it is based on fp. */
25559 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
25560 RTX_FRAME_RELATED_P (insn
) = 1;
25561 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
25564 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
25567 /* Implements target hook vector_mode_supported_p. */
25569 arm_vector_mode_supported_p (machine_mode mode
)
25571 /* Neon also supports V2SImode, etc. listed in the clause below. */
25572 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
25573 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
25574 || mode
== V2DImode
|| mode
== V8HFmode
))
25577 if ((TARGET_NEON
|| TARGET_IWMMXT
)
25578 && ((mode
== V2SImode
)
25579 || (mode
== V4HImode
)
25580 || (mode
== V8QImode
)))
25583 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
25584 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
25585 || mode
== V2HAmode
))
25591 /* Implements target hook array_mode_supported_p. */
25594 arm_array_mode_supported_p (machine_mode mode
,
25595 unsigned HOST_WIDE_INT nelems
)
25598 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
25599 && (nelems
>= 2 && nelems
<= 4))
25605 /* Use the option -mvectorize-with-neon-double to override the use of quardword
25606 registers when autovectorizing for Neon, at least until multiple vector
25607 widths are supported properly by the middle-end. */
25609 static machine_mode
25610 arm_preferred_simd_mode (machine_mode mode
)
25616 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
25618 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
25620 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
25622 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
25624 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
25631 if (TARGET_REALLY_IWMMXT
)
25647 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
25649 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
25650 using r0-r4 for function arguments, r7 for the stack frame and don't have
25651 enough left over to do doubleword arithmetic. For Thumb-2 all the
25652 potentially problematic instructions accept high registers so this is not
25653 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
25654 that require many low registers. */
25656 arm_class_likely_spilled_p (reg_class_t rclass
)
25658 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
25659 || rclass
== CC_REG
)
25665 /* Implements target hook small_register_classes_for_mode_p. */
25667 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
25669 return TARGET_THUMB1
;
25672 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
25673 ARM insns and therefore guarantee that the shift count is modulo 256.
25674 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
25675 guarantee no particular behavior for out-of-range counts. */
25677 static unsigned HOST_WIDE_INT
25678 arm_shift_truncation_mask (machine_mode mode
)
25680 return mode
== SImode
? 255 : 0;
25684 /* Map internal gcc register numbers to DWARF2 register numbers. */
25687 arm_dbx_register_number (unsigned int regno
)
25692 if (IS_VFP_REGNUM (regno
))
25694 /* See comment in arm_dwarf_register_span. */
25695 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
25696 return 64 + regno
- FIRST_VFP_REGNUM
;
25698 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
25701 if (IS_IWMMXT_GR_REGNUM (regno
))
25702 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
25704 if (IS_IWMMXT_REGNUM (regno
))
25705 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
25707 return DWARF_FRAME_REGISTERS
;
25710 /* Dwarf models VFPv3 registers as 32 64-bit registers.
25711 GCC models tham as 64 32-bit registers, so we need to describe this to
25712 the DWARF generation code. Other registers can use the default. */
25714 arm_dwarf_register_span (rtx rtl
)
25722 regno
= REGNO (rtl
);
25723 if (!IS_VFP_REGNUM (regno
))
25726 /* XXX FIXME: The EABI defines two VFP register ranges:
25727 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
25729 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
25730 corresponding D register. Until GDB supports this, we shall use the
25731 legacy encodings. We also use these encodings for D0-D15 for
25732 compatibility with older debuggers. */
25733 mode
= GET_MODE (rtl
);
25734 if (GET_MODE_SIZE (mode
) < 8)
25737 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
25739 nregs
= GET_MODE_SIZE (mode
) / 4;
25740 for (i
= 0; i
< nregs
; i
+= 2)
25741 if (TARGET_BIG_END
)
25743 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
25744 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
25748 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
25749 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
25754 nregs
= GET_MODE_SIZE (mode
) / 8;
25755 for (i
= 0; i
< nregs
; i
++)
25756 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
25759 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
25762 #if ARM_UNWIND_INFO
25763 /* Emit unwind directives for a store-multiple instruction or stack pointer
25764 push during alignment.
25765 These should only ever be generated by the function prologue code, so
25766 expect them to have a particular form.
25767 The store-multiple instruction sometimes pushes pc as the last register,
25768 although it should not be tracked into unwind information, or for -Os
25769 sometimes pushes some dummy registers before first register that needs
25770 to be tracked in unwind information; such dummy registers are there just
25771 to avoid separate stack adjustment, and will not be restored in the
25775 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
25778 HOST_WIDE_INT offset
;
25779 HOST_WIDE_INT nregs
;
25783 unsigned padfirst
= 0, padlast
= 0;
25786 e
= XVECEXP (p
, 0, 0);
25787 gcc_assert (GET_CODE (e
) == SET
);
25789 /* First insn will adjust the stack pointer. */
25790 gcc_assert (GET_CODE (e
) == SET
25791 && REG_P (SET_DEST (e
))
25792 && REGNO (SET_DEST (e
)) == SP_REGNUM
25793 && GET_CODE (SET_SRC (e
)) == PLUS
);
25795 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
25796 nregs
= XVECLEN (p
, 0) - 1;
25797 gcc_assert (nregs
);
25799 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
25802 /* For -Os dummy registers can be pushed at the beginning to
25803 avoid separate stack pointer adjustment. */
25804 e
= XVECEXP (p
, 0, 1);
25805 e
= XEXP (SET_DEST (e
), 0);
25806 if (GET_CODE (e
) == PLUS
)
25807 padfirst
= INTVAL (XEXP (e
, 1));
25808 gcc_assert (padfirst
== 0 || optimize_size
);
25809 /* The function prologue may also push pc, but not annotate it as it is
25810 never restored. We turn this into a stack pointer adjustment. */
25811 e
= XVECEXP (p
, 0, nregs
);
25812 e
= XEXP (SET_DEST (e
), 0);
25813 if (GET_CODE (e
) == PLUS
)
25814 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
25816 padlast
= offset
- 4;
25817 gcc_assert (padlast
== 0 || padlast
== 4);
25819 fprintf (asm_out_file
, "\t.pad #4\n");
25821 fprintf (asm_out_file
, "\t.save {");
25823 else if (IS_VFP_REGNUM (reg
))
25826 fprintf (asm_out_file
, "\t.vsave {");
25829 /* Unknown register type. */
25830 gcc_unreachable ();
25832 /* If the stack increment doesn't match the size of the saved registers,
25833 something has gone horribly wrong. */
25834 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
25838 /* The remaining insns will describe the stores. */
25839 for (i
= 1; i
<= nregs
; i
++)
25841 /* Expect (set (mem <addr>) (reg)).
25842 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
25843 e
= XVECEXP (p
, 0, i
);
25844 gcc_assert (GET_CODE (e
) == SET
25845 && MEM_P (SET_DEST (e
))
25846 && REG_P (SET_SRC (e
)));
25848 reg
= REGNO (SET_SRC (e
));
25849 gcc_assert (reg
>= lastreg
);
25852 fprintf (asm_out_file
, ", ");
25853 /* We can't use %r for vfp because we need to use the
25854 double precision register names. */
25855 if (IS_VFP_REGNUM (reg
))
25856 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
25858 asm_fprintf (asm_out_file
, "%r", reg
);
25862 /* Check that the addresses are consecutive. */
25863 e
= XEXP (SET_DEST (e
), 0);
25864 if (GET_CODE (e
) == PLUS
)
25865 gcc_assert (REG_P (XEXP (e
, 0))
25866 && REGNO (XEXP (e
, 0)) == SP_REGNUM
25867 && CONST_INT_P (XEXP (e
, 1))
25868 && offset
== INTVAL (XEXP (e
, 1)));
25872 && REGNO (e
) == SP_REGNUM
);
25873 offset
+= reg_size
;
25876 fprintf (asm_out_file
, "}\n");
25878 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
25881 /* Emit unwind directives for a SET. */
25884 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
25892 switch (GET_CODE (e0
))
25895 /* Pushing a single register. */
25896 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
25897 || !REG_P (XEXP (XEXP (e0
, 0), 0))
25898 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
25901 asm_fprintf (asm_out_file
, "\t.save ");
25902 if (IS_VFP_REGNUM (REGNO (e1
)))
25903 asm_fprintf(asm_out_file
, "{d%d}\n",
25904 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
25906 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
25910 if (REGNO (e0
) == SP_REGNUM
)
25912 /* A stack increment. */
25913 if (GET_CODE (e1
) != PLUS
25914 || !REG_P (XEXP (e1
, 0))
25915 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
25916 || !CONST_INT_P (XEXP (e1
, 1)))
25919 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
25920 -INTVAL (XEXP (e1
, 1)));
25922 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
25924 HOST_WIDE_INT offset
;
25926 if (GET_CODE (e1
) == PLUS
)
25928 if (!REG_P (XEXP (e1
, 0))
25929 || !CONST_INT_P (XEXP (e1
, 1)))
25931 reg
= REGNO (XEXP (e1
, 0));
25932 offset
= INTVAL (XEXP (e1
, 1));
25933 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
25934 HARD_FRAME_POINTER_REGNUM
, reg
,
25937 else if (REG_P (e1
))
25940 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
25941 HARD_FRAME_POINTER_REGNUM
, reg
);
25946 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
25948 /* Move from sp to reg. */
25949 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
25951 else if (GET_CODE (e1
) == PLUS
25952 && REG_P (XEXP (e1
, 0))
25953 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
25954 && CONST_INT_P (XEXP (e1
, 1)))
25956 /* Set reg to offset from sp. */
25957 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
25958 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
25970 /* Emit unwind directives for the given insn. */
25973 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
25976 bool handled_one
= false;
25978 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
25981 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
25982 && (TREE_NOTHROW (current_function_decl
)
25983 || crtl
->all_throwers_are_sibcalls
))
25986 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
25989 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
25991 switch (REG_NOTE_KIND (note
))
25993 case REG_FRAME_RELATED_EXPR
:
25994 pat
= XEXP (note
, 0);
25997 case REG_CFA_REGISTER
:
25998 pat
= XEXP (note
, 0);
26001 pat
= PATTERN (insn
);
26002 if (GET_CODE (pat
) == PARALLEL
)
26003 pat
= XVECEXP (pat
, 0, 0);
26006 /* Only emitted for IS_STACKALIGN re-alignment. */
26011 src
= SET_SRC (pat
);
26012 dest
= SET_DEST (pat
);
26014 gcc_assert (src
== stack_pointer_rtx
);
26015 reg
= REGNO (dest
);
26016 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26019 handled_one
= true;
26022 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26023 to get correct dwarf information for shrink-wrap. We should not
26024 emit unwind information for it because these are used either for
26025 pretend arguments or notes to adjust sp and restore registers from
26027 case REG_CFA_DEF_CFA
:
26028 case REG_CFA_ADJUST_CFA
:
26029 case REG_CFA_RESTORE
:
26032 case REG_CFA_EXPRESSION
:
26033 case REG_CFA_OFFSET
:
26034 /* ??? Only handling here what we actually emit. */
26035 gcc_unreachable ();
26043 pat
= PATTERN (insn
);
26046 switch (GET_CODE (pat
))
26049 arm_unwind_emit_set (asm_out_file
, pat
);
26053 /* Store multiple. */
26054 arm_unwind_emit_sequence (asm_out_file
, pat
);
26063 /* Output a reference from a function exception table to the type_info
26064 object X. The EABI specifies that the symbol should be relocated by
26065 an R_ARM_TARGET2 relocation. */
26068 arm_output_ttype (rtx x
)
26070 fputs ("\t.word\t", asm_out_file
);
26071 output_addr_const (asm_out_file
, x
);
26072 /* Use special relocations for symbol references. */
26073 if (!CONST_INT_P (x
))
26074 fputs ("(TARGET2)", asm_out_file
);
26075 fputc ('\n', asm_out_file
);
26080 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26083 arm_asm_emit_except_personality (rtx personality
)
26085 fputs ("\t.personality\t", asm_out_file
);
26086 output_addr_const (asm_out_file
, personality
);
26087 fputc ('\n', asm_out_file
);
26089 #endif /* ARM_UNWIND_INFO */
26091 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26094 arm_asm_init_sections (void)
26096 #if ARM_UNWIND_INFO
26097 exception_section
= get_unnamed_section (0, output_section_asm_op
,
26099 #endif /* ARM_UNWIND_INFO */
26101 #ifdef OBJECT_FORMAT_ELF
26102 if (target_pure_code
)
26103 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
26107 /* Output unwind directives for the start/end of a function. */
26110 arm_output_fn_unwind (FILE * f
, bool prologue
)
26112 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
26116 fputs ("\t.fnstart\n", f
);
26119 /* If this function will never be unwound, then mark it as such.
26120 The came condition is used in arm_unwind_emit to suppress
26121 the frame annotations. */
26122 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
26123 && (TREE_NOTHROW (current_function_decl
)
26124 || crtl
->all_throwers_are_sibcalls
))
26125 fputs("\t.cantunwind\n", f
);
26127 fputs ("\t.fnend\n", f
);
26132 arm_emit_tls_decoration (FILE *fp
, rtx x
)
26134 enum tls_reloc reloc
;
26137 val
= XVECEXP (x
, 0, 0);
26138 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
26140 output_addr_const (fp
, val
);
26145 fputs ("(tlsgd)", fp
);
26148 fputs ("(tlsldm)", fp
);
26151 fputs ("(tlsldo)", fp
);
26154 fputs ("(gottpoff)", fp
);
26157 fputs ("(tpoff)", fp
);
26160 fputs ("(tlsdesc)", fp
);
26163 gcc_unreachable ();
26172 fputs (" + (. - ", fp
);
26173 output_addr_const (fp
, XVECEXP (x
, 0, 2));
26174 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26175 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
26176 output_addr_const (fp
, XVECEXP (x
, 0, 3));
26186 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26189 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
26191 gcc_assert (size
== 4);
26192 fputs ("\t.word\t", file
);
26193 output_addr_const (file
, x
);
26194 fputs ("(tlsldo)", file
);
26197 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26200 arm_output_addr_const_extra (FILE *fp
, rtx x
)
26202 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
26203 return arm_emit_tls_decoration (fp
, x
);
26204 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
26207 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
26209 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
26210 assemble_name_raw (fp
, label
);
26214 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
26216 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
26220 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26224 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
26226 output_addr_const (fp
, XVECEXP (x
, 0, 0));
26230 output_addr_const (fp
, XVECEXP (x
, 0, 1));
26234 else if (GET_CODE (x
) == CONST_VECTOR
)
26235 return arm_emit_vector_const (fp
, x
);
26240 /* Output assembly for a shift instruction.
26241 SET_FLAGS determines how the instruction modifies the condition codes.
26242 0 - Do not set condition codes.
26243 1 - Set condition codes.
26244 2 - Use smallest instruction. */
26246 arm_output_shift(rtx
* operands
, int set_flags
)
26249 static const char flag_chars
[3] = {'?', '.', '!'};
26254 c
= flag_chars
[set_flags
];
26255 shift
= shift_op(operands
[3], &val
);
26259 operands
[2] = GEN_INT(val
);
26260 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
26263 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
26265 output_asm_insn (pattern
, operands
);
26269 /* Output assembly for a WMMX immediate shift instruction. */
26271 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
26273 int shift
= INTVAL (operands
[2]);
26275 machine_mode opmode
= GET_MODE (operands
[0]);
26277 gcc_assert (shift
>= 0);
26279 /* If the shift value in the register versions is > 63 (for D qualifier),
26280 31 (for W qualifier) or 15 (for H qualifier). */
26281 if (((opmode
== V4HImode
) && (shift
> 15))
26282 || ((opmode
== V2SImode
) && (shift
> 31))
26283 || ((opmode
== DImode
) && (shift
> 63)))
26287 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26288 output_asm_insn (templ
, operands
);
26289 if (opmode
== DImode
)
26291 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
26292 output_asm_insn (templ
, operands
);
26297 /* The destination register will contain all zeros. */
26298 sprintf (templ
, "wzero\t%%0");
26299 output_asm_insn (templ
, operands
);
26304 if ((opmode
== DImode
) && (shift
> 32))
26306 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
26307 output_asm_insn (templ
, operands
);
26308 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
26309 output_asm_insn (templ
, operands
);
26313 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
26314 output_asm_insn (templ
, operands
);
26319 /* Output assembly for a WMMX tinsr instruction. */
26321 arm_output_iwmmxt_tinsr (rtx
*operands
)
26323 int mask
= INTVAL (operands
[3]);
26326 int units
= mode_nunits
[GET_MODE (operands
[0])];
26327 gcc_assert ((mask
& (mask
- 1)) == 0);
26328 for (i
= 0; i
< units
; ++i
)
26330 if ((mask
& 0x01) == 1)
26336 gcc_assert (i
< units
);
26338 switch (GET_MODE (operands
[0]))
26341 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
26344 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
26347 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
26350 gcc_unreachable ();
26353 output_asm_insn (templ
, operands
);
26358 /* Output a Thumb-1 casesi dispatch sequence. */
26360 thumb1_output_casesi (rtx
*operands
)
26362 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
26364 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
26366 switch (GET_MODE(diff_vec
))
26369 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26370 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26372 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
26373 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26375 return "bl\t%___gnu_thumb1_case_si";
26377 gcc_unreachable ();
26381 /* Output a Thumb-2 casesi instruction. */
26383 thumb2_output_casesi (rtx
*operands
)
26385 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
26387 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
26389 output_asm_insn ("cmp\t%0, %1", operands
);
26390 output_asm_insn ("bhi\t%l3", operands
);
26391 switch (GET_MODE(diff_vec
))
26394 return "tbb\t[%|pc, %0]";
26396 return "tbh\t[%|pc, %0, lsl #1]";
26400 output_asm_insn ("adr\t%4, %l2", operands
);
26401 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
26402 output_asm_insn ("add\t%4, %4, %5", operands
);
26407 output_asm_insn ("adr\t%4, %l2", operands
);
26408 return "ldr\t%|pc, [%4, %0, lsl #2]";
26411 gcc_unreachable ();
26415 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
26416 per-core tuning structs. */
26418 arm_issue_rate (void)
26420 return current_tune
->issue_rate
;
26423 /* Return how many instructions should scheduler lookahead to choose the
26426 arm_first_cycle_multipass_dfa_lookahead (void)
26428 int issue_rate
= arm_issue_rate ();
26430 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
26433 /* Enable modeling of L2 auto-prefetcher. */
26435 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
26437 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
26441 arm_mangle_type (const_tree type
)
26443 /* The ARM ABI documents (10th October 2008) say that "__va_list"
26444 has to be managled as if it is in the "std" namespace. */
26445 if (TARGET_AAPCS_BASED
26446 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
26447 return "St9__va_list";
26449 /* Half-precision float. */
26450 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
26453 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
26455 if (TYPE_NAME (type
) != NULL
)
26456 return arm_mangle_builtin_type (type
);
26458 /* Use the default mangling. */
26462 /* Order of allocation of core registers for Thumb: this allocation is
26463 written over the corresponding initial entries of the array
26464 initialized with REG_ALLOC_ORDER. We allocate all low registers
26465 first. Saving and restoring a low register is usually cheaper than
26466 using a call-clobbered high register. */
26468 static const int thumb_core_reg_alloc_order
[] =
26470 3, 2, 1, 0, 4, 5, 6, 7,
26471 14, 12, 8, 9, 10, 11
26474 /* Adjust register allocation order when compiling for Thumb. */
26477 arm_order_regs_for_local_alloc (void)
26479 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
26480 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
26482 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
26483 sizeof (thumb_core_reg_alloc_order
));
26486 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
26489 arm_frame_pointer_required (void)
26491 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
26494 /* If the function receives nonlocal gotos, it needs to save the frame
26495 pointer in the nonlocal_goto_save_area object. */
26496 if (cfun
->has_nonlocal_label
)
26499 /* The frame pointer is required for non-leaf APCS frames. */
26500 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !leaf_function_p ())
26503 /* If we are probing the stack in the prologue, we will have a faulting
26504 instruction prior to the stack adjustment and this requires a frame
26505 pointer if we want to catch the exception using the EABI unwinder. */
26506 if (!IS_INTERRUPT (arm_current_func_type ())
26507 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
26508 && arm_except_unwind_info (&global_options
) == UI_TARGET
26509 && cfun
->can_throw_non_call_exceptions
)
26511 HOST_WIDE_INT size
= get_frame_size ();
26513 /* That's irrelevant if there is no stack adjustment. */
26517 /* That's relevant only if there is a stack probe. */
26518 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
26520 /* We don't have the final size of the frame so adjust. */
26521 size
+= 32 * UNITS_PER_WORD
;
26522 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
26532 /* Only thumb1 can't support conditional execution, so return true if
26533 the target is not thumb1. */
26535 arm_have_conditional_execution (void)
26537 return !TARGET_THUMB1
;
26540 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
26541 static HOST_WIDE_INT
26542 arm_vector_alignment (const_tree type
)
26544 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
26546 if (TARGET_AAPCS_BASED
)
26547 align
= MIN (align
, 64);
26552 static unsigned int
26553 arm_autovectorize_vector_sizes (void)
26555 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
26559 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
26561 /* Vectors which aren't in packed structures will not be less aligned than
26562 the natural alignment of their element type, so this is safe. */
26563 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
26566 return default_builtin_vector_alignment_reachable (type
, is_packed
);
26570 arm_builtin_support_vector_misalignment (machine_mode mode
,
26571 const_tree type
, int misalignment
,
26574 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
26576 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
26581 /* If the misalignment is unknown, we should be able to handle the access
26582 so long as it is not to a member of a packed data structure. */
26583 if (misalignment
== -1)
26586 /* Return true if the misalignment is a multiple of the natural alignment
26587 of the vector's element type. This is probably always going to be
26588 true in practice, since we've already established that this isn't a
26590 return ((misalignment
% align
) == 0);
26593 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
26598 arm_conditional_register_usage (void)
26602 if (TARGET_THUMB1
&& optimize_size
)
26604 /* When optimizing for size on Thumb-1, it's better not
26605 to use the HI regs, because of the overhead of
26607 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
26608 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
26611 /* The link register can be clobbered by any branch insn,
26612 but we have no way to track that at present, so mark
26613 it as unavailable. */
26615 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
26617 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
26619 /* VFPv3 registers are disabled when earlier VFP
26620 versions are selected due to the definition of
26621 LAST_VFP_REGNUM. */
26622 for (regno
= FIRST_VFP_REGNUM
;
26623 regno
<= LAST_VFP_REGNUM
; ++ regno
)
26625 fixed_regs
[regno
] = 0;
26626 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
26627 || regno
>= FIRST_VFP_REGNUM
+ 32;
26631 if (TARGET_REALLY_IWMMXT
)
26633 regno
= FIRST_IWMMXT_GR_REGNUM
;
26634 /* The 2002/10/09 revision of the XScale ABI has wCG0
26635 and wCG1 as call-preserved registers. The 2002/11/21
26636 revision changed this so that all wCG registers are
26637 scratch registers. */
26638 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
26639 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
26640 fixed_regs
[regno
] = 0;
26641 /* The XScale ABI has wR0 - wR9 as scratch registers,
26642 the rest as call-preserved registers. */
26643 for (regno
= FIRST_IWMMXT_REGNUM
;
26644 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
26646 fixed_regs
[regno
] = 0;
26647 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
26651 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
26653 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
26654 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
26656 else if (TARGET_APCS_STACK
)
26658 fixed_regs
[10] = 1;
26659 call_used_regs
[10] = 1;
26661 /* -mcaller-super-interworking reserves r11 for calls to
26662 _interwork_r11_call_via_rN(). Making the register global
26663 is an easy way of ensuring that it remains valid for all
26665 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
26666 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
26668 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
26669 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
26670 if (TARGET_CALLER_INTERWORKING
)
26671 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
26673 SUBTARGET_CONDITIONAL_REGISTER_USAGE
26677 arm_preferred_rename_class (reg_class_t rclass
)
26679 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
26680 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
26681 and code size can be reduced. */
26682 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
26688 /* Compute the attribute "length" of insn "*push_multi".
26689 So this function MUST be kept in sync with that insn pattern. */
26691 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
26693 int i
, regno
, hi_reg
;
26694 int num_saves
= XVECLEN (parallel_op
, 0);
26704 regno
= REGNO (first_op
);
26705 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
26706 list is 8-bit. Normally this means all registers in the list must be
26707 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
26708 encodings. There is one exception for PUSH that LR in HI_REGS can be used
26709 with 16-bit encoding. */
26710 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
26711 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
26713 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
26714 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
26722 /* Compute the attribute "length" of insn. Currently, this function is used
26723 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
26724 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
26725 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
26726 true if OPERANDS contains insn which explicit updates base register. */
26729 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
26738 rtx parallel_op
= operands
[0];
26739 /* Initialize to elements number of PARALLEL. */
26740 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
26741 /* Initialize the value to base register. */
26742 unsigned regno
= REGNO (operands
[1]);
26743 /* Skip return and write back pattern.
26744 We only need register pop pattern for later analysis. */
26745 unsigned first_indx
= 0;
26746 first_indx
+= return_pc
? 1 : 0;
26747 first_indx
+= write_back_p
? 1 : 0;
26749 /* A pop operation can be done through LDM or POP. If the base register is SP
26750 and if it's with write back, then a LDM will be alias of POP. */
26751 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
26752 bool ldm_p
= !pop_p
;
26754 /* Check base register for LDM. */
26755 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
26758 /* Check each register in the list. */
26759 for (; indx
>= first_indx
; indx
--)
26761 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
26762 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
26763 comment in arm_attr_length_push_multi. */
26764 if (REGNO_REG_CLASS (regno
) == HI_REGS
26765 && (regno
!= PC_REGNUM
|| ldm_p
))
26772 /* Compute the number of instructions emitted by output_move_double. */
26774 arm_count_output_move_double_insns (rtx
*operands
)
26778 /* output_move_double may modify the operands array, so call it
26779 here on a copy of the array. */
26780 ops
[0] = operands
[0];
26781 ops
[1] = operands
[1];
26782 output_move_double (ops
, false, &count
);
26787 vfp3_const_double_for_fract_bits (rtx operand
)
26789 REAL_VALUE_TYPE r0
;
26791 if (!CONST_DOUBLE_P (operand
))
26794 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
26795 if (exact_real_inverse (DFmode
, &r0
)
26796 && !REAL_VALUE_NEGATIVE (r0
))
26798 if (exact_real_truncate (DFmode
, &r0
))
26800 HOST_WIDE_INT value
= real_to_integer (&r0
);
26801 value
= value
& 0xffffffff;
26802 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
26804 int ret
= exact_log2 (value
);
26805 gcc_assert (IN_RANGE (ret
, 0, 31));
26813 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
26814 log2 is in [1, 32], return that log2. Otherwise return -1.
26815 This is used in the patterns for vcvt.s32.f32 floating-point to
26816 fixed-point conversions. */
26819 vfp3_const_double_for_bits (rtx x
)
26821 const REAL_VALUE_TYPE
*r
;
26823 if (!CONST_DOUBLE_P (x
))
26826 r
= CONST_DOUBLE_REAL_VALUE (x
);
26828 if (REAL_VALUE_NEGATIVE (*r
)
26829 || REAL_VALUE_ISNAN (*r
)
26830 || REAL_VALUE_ISINF (*r
)
26831 || !real_isinteger (r
, SFmode
))
26834 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
26836 /* The exact_log2 above will have returned -1 if this is
26837 not an exact log2. */
26838 if (!IN_RANGE (hwint
, 1, 32))
26845 /* Emit a memory barrier around an atomic sequence according to MODEL. */
26848 arm_pre_atomic_barrier (enum memmodel model
)
26850 if (need_atomic_barrier_p (model
, true))
26851 emit_insn (gen_memory_barrier ());
26855 arm_post_atomic_barrier (enum memmodel model
)
26857 if (need_atomic_barrier_p (model
, false))
26858 emit_insn (gen_memory_barrier ());
26861 /* Emit the load-exclusive and store-exclusive instructions.
26862 Use acquire and release versions if necessary. */
26865 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
26867 rtx (*gen
) (rtx
, rtx
);
26873 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
26874 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
26875 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
26876 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
26878 gcc_unreachable ();
26885 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
26886 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
26887 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
26888 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
26890 gcc_unreachable ();
26894 emit_insn (gen (rval
, mem
));
26898 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
26901 rtx (*gen
) (rtx
, rtx
, rtx
);
26907 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
26908 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
26909 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
26910 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
26912 gcc_unreachable ();
26919 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
26920 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
26921 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
26922 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
26924 gcc_unreachable ();
26928 emit_insn (gen (bval
, rval
, mem
));
26931 /* Mark the previous jump instruction as unlikely. */
26934 emit_unlikely_jump (rtx insn
)
26936 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
26938 insn
= emit_jump_insn (insn
);
26939 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
26942 /* Expand a compare and swap pattern. */
26945 arm_expand_compare_and_swap (rtx operands
[])
26947 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
26949 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
26951 bval
= operands
[0];
26952 rval
= operands
[1];
26954 oldval
= operands
[3];
26955 newval
= operands
[4];
26956 is_weak
= operands
[5];
26957 mod_s
= operands
[6];
26958 mod_f
= operands
[7];
26959 mode
= GET_MODE (mem
);
26961 /* Normally the succ memory model must be stronger than fail, but in the
26962 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
26963 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
26965 if (TARGET_HAVE_LDACQ
26966 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
26967 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
26968 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
26974 /* For narrow modes, we're going to perform the comparison in SImode,
26975 so do the zero-extension now. */
26976 rval
= gen_reg_rtx (SImode
);
26977 oldval
= convert_modes (SImode
, mode
, oldval
, true);
26981 /* Force the value into a register if needed. We waited until after
26982 the zero-extension above to do this properly. */
26983 if (!arm_add_operand (oldval
, SImode
))
26984 oldval
= force_reg (SImode
, oldval
);
26988 if (!cmpdi_operand (oldval
, mode
))
26989 oldval
= force_reg (mode
, oldval
);
26993 gcc_unreachable ();
26998 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
26999 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
27000 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
27001 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
27003 gcc_unreachable ();
27006 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CCmode
, CC_REGNUM
);
27007 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
27009 if (mode
== QImode
|| mode
== HImode
)
27010 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
27012 /* In all cases, we arrange for success to be signaled by Z set.
27013 This arrangement allows for the boolean result to be used directly
27014 in a subsequent branch, post optimization. For Thumb-1 targets, the
27015 boolean negation of the result is also stored in bval because Thumb-1
27016 backend lacks dependency tracking for CC flag due to flag-setting not
27017 being represented at RTL level. */
27019 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
27022 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
27023 emit_insn (gen_rtx_SET (bval
, x
));
27027 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27028 another memory store between the load-exclusive and store-exclusive can
27029 reset the monitor from Exclusive to Open state. This means we must wait
27030 until after reload to split the pattern, lest we get a register spill in
27031 the middle of the atomic sequence. Success of the compare and swap is
27032 indicated by the Z flag set for 32bit targets and by neg_bval being zero
27033 for Thumb-1 targets (ie. negation of the boolean value returned by
27034 atomic_compare_and_swapmode standard pattern in operand 0). */
27037 arm_split_compare_and_swap (rtx operands
[])
27039 rtx rval
, mem
, oldval
, newval
, neg_bval
;
27041 enum memmodel mod_s
, mod_f
;
27043 rtx_code_label
*label1
, *label2
;
27046 rval
= operands
[1];
27048 oldval
= operands
[3];
27049 newval
= operands
[4];
27050 is_weak
= (operands
[5] != const0_rtx
);
27051 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
27052 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
27053 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
27054 mode
= GET_MODE (mem
);
27056 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
27058 bool use_acquire
= TARGET_HAVE_LDACQ
27059 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27060 || is_mm_release (mod_s
));
27062 bool use_release
= TARGET_HAVE_LDACQ
27063 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
27064 || is_mm_acquire (mod_s
));
27066 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27067 a full barrier is emitted after the store-release. */
27069 use_acquire
= false;
27071 /* Checks whether a barrier is needed and emits one accordingly. */
27072 if (!(use_acquire
|| use_release
))
27073 arm_pre_atomic_barrier (mod_s
);
27078 label1
= gen_label_rtx ();
27079 emit_label (label1
);
27081 label2
= gen_label_rtx ();
27083 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
27085 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
27086 as required to communicate with arm_expand_compare_and_swap. */
27089 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
27090 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27091 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
27092 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
27093 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
27097 emit_move_insn (neg_bval
, const1_rtx
);
27098 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
27099 if (thumb1_cmpneg_operand (oldval
, SImode
))
27100 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
27103 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
27106 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
27108 /* Weak or strong, we want EQ to be true for success, so that we
27109 match the flags that we got from the compare above. */
27112 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
27113 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
27114 emit_insn (gen_rtx_SET (cond
, x
));
27119 /* Z is set to boolean value of !neg_bval, as required to communicate
27120 with arm_expand_compare_and_swap. */
27121 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
27122 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
27125 if (!is_mm_relaxed (mod_f
))
27126 emit_label (label2
);
27128 /* Checks whether a barrier is needed and emits one accordingly. */
27130 || !(use_acquire
|| use_release
))
27131 arm_post_atomic_barrier (mod_s
);
27133 if (is_mm_relaxed (mod_f
))
27134 emit_label (label2
);
27137 /* Split an atomic operation pattern. Operation is given by CODE and is one
27138 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
27139 operation). Operation is performed on the content at MEM and on VALUE
27140 following the memory model MODEL_RTX. The content at MEM before and after
27141 the operation is returned in OLD_OUT and NEW_OUT respectively while the
27142 success of the operation is returned in COND. Using a scratch register or
27143 an operand register for these determines what result is returned for that
27147 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
27148 rtx value
, rtx model_rtx
, rtx cond
)
27150 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
27151 machine_mode mode
= GET_MODE (mem
);
27152 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
27153 rtx_code_label
*label
;
27154 bool all_low_regs
, bind_old_new
;
27157 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
27159 bool use_acquire
= TARGET_HAVE_LDACQ
27160 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
27161 || is_mm_release (model
));
27163 bool use_release
= TARGET_HAVE_LDACQ
27164 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
27165 || is_mm_acquire (model
));
27167 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
27168 a full barrier is emitted after the store-release. */
27170 use_acquire
= false;
27172 /* Checks whether a barrier is needed and emits one accordingly. */
27173 if (!(use_acquire
|| use_release
))
27174 arm_pre_atomic_barrier (model
);
27176 label
= gen_label_rtx ();
27177 emit_label (label
);
27180 new_out
= gen_lowpart (wmode
, new_out
);
27182 old_out
= gen_lowpart (wmode
, old_out
);
27185 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
27187 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
27189 /* Does the operation require destination and first operand to use the same
27190 register? This is decided by register constraints of relevant insn
27191 patterns in thumb1.md. */
27192 gcc_assert (!new_out
|| REG_P (new_out
));
27193 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
27194 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
27195 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
27200 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
27202 /* We want to return the old value while putting the result of the operation
27203 in the same register as the old value so copy the old value over to the
27204 destination register and use that register for the operation. */
27205 if (old_out
&& bind_old_new
)
27207 emit_move_insn (new_out
, old_out
);
27218 x
= gen_rtx_AND (wmode
, old_out
, value
);
27219 emit_insn (gen_rtx_SET (new_out
, x
));
27220 x
= gen_rtx_NOT (wmode
, new_out
);
27221 emit_insn (gen_rtx_SET (new_out
, x
));
27225 if (CONST_INT_P (value
))
27227 value
= GEN_INT (-INTVAL (value
));
27233 if (mode
== DImode
)
27235 /* DImode plus/minus need to clobber flags. */
27236 /* The adddi3 and subdi3 patterns are incorrectly written so that
27237 they require matching operands, even when we could easily support
27238 three operands. Thankfully, this can be fixed up post-splitting,
27239 as the individual add+adc patterns do accept three operands and
27240 post-reload cprop can make these moves go away. */
27241 emit_move_insn (new_out
, old_out
);
27243 x
= gen_adddi3 (new_out
, new_out
, value
);
27245 x
= gen_subdi3 (new_out
, new_out
, value
);
27252 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
27253 emit_insn (gen_rtx_SET (new_out
, x
));
27257 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
27260 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
27261 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
27263 /* Checks whether a barrier is needed and emits one accordingly. */
27265 || !(use_acquire
|| use_release
))
27266 arm_post_atomic_barrier (model
);
27269 #define MAX_VECT_LEN 16
27271 struct expand_vec_perm_d
27273 rtx target
, op0
, op1
;
27274 unsigned char perm
[MAX_VECT_LEN
];
27275 machine_mode vmode
;
27276 unsigned char nelt
;
27281 /* Generate a variable permutation. */
27284 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27286 machine_mode vmode
= GET_MODE (target
);
27287 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27289 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
27290 gcc_checking_assert (GET_MODE (op0
) == vmode
);
27291 gcc_checking_assert (GET_MODE (op1
) == vmode
);
27292 gcc_checking_assert (GET_MODE (sel
) == vmode
);
27293 gcc_checking_assert (TARGET_NEON
);
27297 if (vmode
== V8QImode
)
27298 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
27300 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
27306 if (vmode
== V8QImode
)
27308 pair
= gen_reg_rtx (V16QImode
);
27309 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
27310 pair
= gen_lowpart (TImode
, pair
);
27311 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
27315 pair
= gen_reg_rtx (OImode
);
27316 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
27317 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
27323 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27325 machine_mode vmode
= GET_MODE (target
);
27326 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
27327 bool one_vector_p
= rtx_equal_p (op0
, op1
);
27328 rtx rmask
[MAX_VECT_LEN
], mask
;
27330 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27331 numbering of elements for big-endian, we must reverse the order. */
27332 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
27334 /* The VTBL instruction does not use a modulo index, so we must take care
27335 of that ourselves. */
27336 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27337 for (i
= 0; i
< nelt
; ++i
)
27339 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
27340 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
27342 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
27345 /* Map lane ordering between architectural lane order, and GCC lane order,
27346 taking into account ABI. See comment above output_move_neon for details. */
27349 neon_endian_lane_map (machine_mode mode
, int lane
)
27351 if (BYTES_BIG_ENDIAN
)
27353 int nelems
= GET_MODE_NUNITS (mode
);
27354 /* Reverse lane order. */
27355 lane
= (nelems
- 1 - lane
);
27356 /* Reverse D register order, to match ABI. */
27357 if (GET_MODE_SIZE (mode
) == 16)
27358 lane
= lane
^ (nelems
/ 2);
27363 /* Some permutations index into pairs of vectors, this is a helper function
27364 to map indexes into those pairs of vectors. */
27367 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
27369 int nelem
= GET_MODE_NUNITS (mode
);
27370 if (BYTES_BIG_ENDIAN
)
27372 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
27376 /* Generate or test for an insn that supports a constant permutation. */
27378 /* Recognize patterns for the VUZP insns. */
27381 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
27383 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27384 rtx out0
, out1
, in0
, in1
;
27385 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27389 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27392 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
27393 big endian pattern on 64 bit vectors, so we correct for that. */
27394 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
27395 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
27397 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
27399 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
27401 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
27405 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27407 for (i
= 0; i
< nelt
; i
++)
27410 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
27411 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
27421 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
27422 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
27423 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
27424 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
27425 case V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
27426 case V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
27427 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
27428 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
27429 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
27430 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
27432 gcc_unreachable ();
27437 if (swap_nelt
!= 0)
27438 std::swap (in0
, in1
);
27441 out1
= gen_reg_rtx (d
->vmode
);
27443 std::swap (out0
, out1
);
27445 emit_insn (gen (out0
, in0
, in1
, out1
));
27449 /* Recognize patterns for the VZIP insns. */
27452 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
27454 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
27455 rtx out0
, out1
, in0
, in1
;
27456 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27460 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27463 is_swapped
= BYTES_BIG_ENDIAN
;
27465 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
27468 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
27470 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
27474 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27476 for (i
= 0; i
< nelt
/ 2; i
++)
27479 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
27480 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
27484 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
27485 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
27496 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
27497 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
27498 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
27499 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
27500 case V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
27501 case V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
27502 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
27503 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
27504 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
27505 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
27507 gcc_unreachable ();
27513 std::swap (in0
, in1
);
27516 out1
= gen_reg_rtx (d
->vmode
);
27518 std::swap (out0
, out1
);
27520 emit_insn (gen (out0
, in0
, in1
, out1
));
27524 /* Recognize patterns for the VREV insns. */
27527 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
27529 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
27530 rtx (*gen
)(rtx
, rtx
);
27532 if (!d
->one_vector_p
)
27541 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
27542 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
27550 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
27551 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
27552 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
27553 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
27554 case V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
27555 case V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
27563 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
27564 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
27565 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
27566 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
27567 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
27568 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
27569 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
27570 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
27579 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
27580 for (j
= 0; j
<= diff
; j
+= 1)
27582 /* This is guaranteed to be true as the value of diff
27583 is 7, 3, 1 and we should have enough elements in the
27584 queue to generate this. Getting a vector mask with a
27585 value of diff other than these values implies that
27586 something is wrong by the time we get here. */
27587 gcc_assert (i
+ j
< nelt
);
27588 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
27596 emit_insn (gen (d
->target
, d
->op0
));
27600 /* Recognize patterns for the VTRN insns. */
27603 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
27605 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
27606 rtx out0
, out1
, in0
, in1
;
27607 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
27609 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
27612 /* Note that these are little-endian tests. Adjust for big-endian later. */
27613 if (d
->perm
[0] == 0)
27615 else if (d
->perm
[0] == 1)
27619 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
27621 for (i
= 0; i
< nelt
; i
+= 2)
27623 if (d
->perm
[i
] != i
+ odd
)
27625 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
27635 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
27636 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
27637 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
27638 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
27639 case V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
27640 case V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
27641 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
27642 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
27643 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
27644 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
27646 gcc_unreachable ();
27651 if (BYTES_BIG_ENDIAN
)
27653 std::swap (in0
, in1
);
27658 out1
= gen_reg_rtx (d
->vmode
);
27660 std::swap (out0
, out1
);
27662 emit_insn (gen (out0
, in0
, in1
, out1
));
27666 /* Recognize patterns for the VEXT insns. */
27669 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
27671 unsigned int i
, nelt
= d
->nelt
;
27672 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
27675 unsigned int location
;
27677 unsigned int next
= d
->perm
[0] + 1;
27679 /* TODO: Handle GCC's numbering of elements for big-endian. */
27680 if (BYTES_BIG_ENDIAN
)
27683 /* Check if the extracted indexes are increasing by one. */
27684 for (i
= 1; i
< nelt
; next
++, i
++)
27686 /* If we hit the most significant element of the 2nd vector in
27687 the previous iteration, no need to test further. */
27688 if (next
== 2 * nelt
)
27691 /* If we are operating on only one vector: it could be a
27692 rotation. If there are only two elements of size < 64, let
27693 arm_evpc_neon_vrev catch it. */
27694 if (d
->one_vector_p
&& (next
== nelt
))
27696 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
27702 if (d
->perm
[i
] != next
)
27706 location
= d
->perm
[0];
27710 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
27711 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
27712 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
27713 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
27714 case V2SImode
: gen
= gen_neon_vextv2si
; break;
27715 case V4SImode
: gen
= gen_neon_vextv4si
; break;
27716 case V4HFmode
: gen
= gen_neon_vextv4hf
; break;
27717 case V8HFmode
: gen
= gen_neon_vextv8hf
; break;
27718 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
27719 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
27720 case V2DImode
: gen
= gen_neon_vextv2di
; break;
27729 offset
= GEN_INT (location
);
27730 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
27734 /* The NEON VTBL instruction is a fully variable permuation that's even
27735 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
27736 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
27737 can do slightly better by expanding this as a constant where we don't
27738 have to apply a mask. */
27741 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
27743 rtx rperm
[MAX_VECT_LEN
], sel
;
27744 machine_mode vmode
= d
->vmode
;
27745 unsigned int i
, nelt
= d
->nelt
;
27747 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27748 numbering of elements for big-endian, we must reverse the order. */
27749 if (BYTES_BIG_ENDIAN
)
27755 /* Generic code will try constant permutation twice. Once with the
27756 original mode and again with the elements lowered to QImode.
27757 So wait and don't do the selector expansion ourselves. */
27758 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
27761 for (i
= 0; i
< nelt
; ++i
)
27762 rperm
[i
] = GEN_INT (d
->perm
[i
]);
27763 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
27764 sel
= force_reg (vmode
, sel
);
27766 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
27771 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
27773 /* Check if the input mask matches vext before reordering the
27776 if (arm_evpc_neon_vext (d
))
27779 /* The pattern matching functions above are written to look for a small
27780 number to begin the sequence (0, 1, N/2). If we begin with an index
27781 from the second operand, we can swap the operands. */
27782 if (d
->perm
[0] >= d
->nelt
)
27784 unsigned i
, nelt
= d
->nelt
;
27786 for (i
= 0; i
< nelt
; ++i
)
27787 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
27789 std::swap (d
->op0
, d
->op1
);
27794 if (arm_evpc_neon_vuzp (d
))
27796 if (arm_evpc_neon_vzip (d
))
27798 if (arm_evpc_neon_vrev (d
))
27800 if (arm_evpc_neon_vtrn (d
))
27802 return arm_evpc_neon_vtbl (d
);
27807 /* Expand a vec_perm_const pattern. */
27810 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
27812 struct expand_vec_perm_d d
;
27813 int i
, nelt
, which
;
27819 d
.vmode
= GET_MODE (target
);
27820 gcc_assert (VECTOR_MODE_P (d
.vmode
));
27821 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
27822 d
.testing_p
= false;
27824 for (i
= which
= 0; i
< nelt
; ++i
)
27826 rtx e
= XVECEXP (sel
, 0, i
);
27827 int ei
= INTVAL (e
) & (2 * nelt
- 1);
27828 which
|= (ei
< nelt
? 1 : 2);
27838 d
.one_vector_p
= false;
27839 if (!rtx_equal_p (op0
, op1
))
27842 /* The elements of PERM do not suggest that only the first operand
27843 is used, but both operands are identical. Allow easier matching
27844 of the permutation by folding the permutation into the single
27848 for (i
= 0; i
< nelt
; ++i
)
27849 d
.perm
[i
] &= nelt
- 1;
27851 d
.one_vector_p
= true;
27856 d
.one_vector_p
= true;
27860 return arm_expand_vec_perm_const_1 (&d
);
27863 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
27866 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
27867 const unsigned char *sel
)
27869 struct expand_vec_perm_d d
;
27870 unsigned int i
, nelt
, which
;
27874 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
27875 d
.testing_p
= true;
27876 memcpy (d
.perm
, sel
, nelt
);
27878 /* Categorize the set of elements in the selector. */
27879 for (i
= which
= 0; i
< nelt
; ++i
)
27881 unsigned char e
= d
.perm
[i
];
27882 gcc_assert (e
< 2 * nelt
);
27883 which
|= (e
< nelt
? 1 : 2);
27886 /* For all elements from second vector, fold the elements to first. */
27888 for (i
= 0; i
< nelt
; ++i
)
27891 /* Check whether the mask can be applied to the vector type. */
27892 d
.one_vector_p
= (which
!= 3);
27894 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
27895 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
27896 if (!d
.one_vector_p
)
27897 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
27900 ret
= arm_expand_vec_perm_const_1 (&d
);
27907 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
27909 /* If we are soft float and we do not have ldrd
27910 then all auto increment forms are ok. */
27911 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
27916 /* Post increment and Pre Decrement are supported for all
27917 instruction forms except for vector forms. */
27920 if (VECTOR_MODE_P (mode
))
27922 if (code
!= ARM_PRE_DEC
)
27932 /* Without LDRD and mode size greater than
27933 word size, there is no point in auto-incrementing
27934 because ldm and stm will not have these forms. */
27935 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
27938 /* Vector and floating point modes do not support
27939 these auto increment forms. */
27940 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
27953 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
27954 on ARM, since we know that shifts by negative amounts are no-ops.
27955 Additionally, the default expansion code is not available or suitable
27956 for post-reload insn splits (this can occur when the register allocator
27957 chooses not to do a shift in NEON).
27959 This function is used in both initial expand and post-reload splits, and
27960 handles all kinds of 64-bit shifts.
27962 Input requirements:
27963 - It is safe for the input and output to be the same register, but
27964 early-clobber rules apply for the shift amount and scratch registers.
27965 - Shift by register requires both scratch registers. In all other cases
27966 the scratch registers may be NULL.
27967 - Ashiftrt by a register also clobbers the CC register. */
27969 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
27970 rtx amount
, rtx scratch1
, rtx scratch2
)
27972 rtx out_high
= gen_highpart (SImode
, out
);
27973 rtx out_low
= gen_lowpart (SImode
, out
);
27974 rtx in_high
= gen_highpart (SImode
, in
);
27975 rtx in_low
= gen_lowpart (SImode
, in
);
27978 in = the register pair containing the input value.
27979 out = the destination register pair.
27980 up = the high- or low-part of each pair.
27981 down = the opposite part to "up".
27982 In a shift, we can consider bits to shift from "up"-stream to
27983 "down"-stream, so in a left-shift "up" is the low-part and "down"
27984 is the high-part of each register pair. */
27986 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
27987 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
27988 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
27989 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
27991 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
27993 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
27994 && GET_MODE (out
) == DImode
);
27996 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
27997 && GET_MODE (in
) == DImode
);
27999 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
28000 && GET_MODE (amount
) == SImode
)
28001 || CONST_INT_P (amount
)));
28002 gcc_assert (scratch1
== NULL
28003 || (GET_CODE (scratch1
) == SCRATCH
)
28004 || (GET_MODE (scratch1
) == SImode
28005 && REG_P (scratch1
)));
28006 gcc_assert (scratch2
== NULL
28007 || (GET_CODE (scratch2
) == SCRATCH
)
28008 || (GET_MODE (scratch2
) == SImode
28009 && REG_P (scratch2
)));
28010 gcc_assert (!REG_P (out
) || !REG_P (amount
)
28011 || !HARD_REGISTER_P (out
)
28012 || (REGNO (out
) != REGNO (amount
)
28013 && REGNO (out
) + 1 != REGNO (amount
)));
28015 /* Macros to make following code more readable. */
28016 #define SUB_32(DEST,SRC) \
28017 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28018 #define RSB_32(DEST,SRC) \
28019 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28020 #define SUB_S_32(DEST,SRC) \
28021 gen_addsi3_compare0 ((DEST), (SRC), \
28023 #define SET(DEST,SRC) \
28024 gen_rtx_SET ((DEST), (SRC))
28025 #define SHIFT(CODE,SRC,AMOUNT) \
28026 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28027 #define LSHIFT(CODE,SRC,AMOUNT) \
28028 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28029 SImode, (SRC), (AMOUNT))
28030 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28031 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28032 SImode, (SRC), (AMOUNT))
28034 gen_rtx_IOR (SImode, (A), (B))
28035 #define BRANCH(COND,LABEL) \
28036 gen_arm_cond_branch ((LABEL), \
28037 gen_rtx_ ## COND (CCmode, cc_reg, \
28041 /* Shifts by register and shifts by constant are handled separately. */
28042 if (CONST_INT_P (amount
))
28044 /* We have a shift-by-constant. */
28046 /* First, handle out-of-range shift amounts.
28047 In both cases we try to match the result an ARM instruction in a
28048 shift-by-register would give. This helps reduce execution
28049 differences between optimization levels, but it won't stop other
28050 parts of the compiler doing different things. This is "undefined
28051 behavior, in any case. */
28052 if (INTVAL (amount
) <= 0)
28053 emit_insn (gen_movdi (out
, in
));
28054 else if (INTVAL (amount
) >= 64)
28056 if (code
== ASHIFTRT
)
28058 rtx const31_rtx
= GEN_INT (31);
28059 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
28060 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
28063 emit_insn (gen_movdi (out
, const0_rtx
));
28066 /* Now handle valid shifts. */
28067 else if (INTVAL (amount
) < 32)
28069 /* Shifts by a constant less than 32. */
28070 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
28072 /* Clearing the out register in DImode first avoids lots
28073 of spilling and results in less stack usage.
28074 Later this redundant insn is completely removed.
28075 Do that only if "in" and "out" are different registers. */
28076 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
28077 emit_insn (SET (out
, const0_rtx
));
28078 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28079 emit_insn (SET (out_down
,
28080 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
28082 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28086 /* Shifts by a constant greater than 31. */
28087 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
28089 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
28090 emit_insn (SET (out
, const0_rtx
));
28091 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
28092 if (code
== ASHIFTRT
)
28093 emit_insn (gen_ashrsi3 (out_up
, in_up
,
28096 emit_insn (SET (out_up
, const0_rtx
));
28101 /* We have a shift-by-register. */
28102 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
28104 /* This alternative requires the scratch registers. */
28105 gcc_assert (scratch1
&& REG_P (scratch1
));
28106 gcc_assert (scratch2
&& REG_P (scratch2
));
28108 /* We will need the values "amount-32" and "32-amount" later.
28109 Swapping them around now allows the later code to be more general. */
28113 emit_insn (SUB_32 (scratch1
, amount
));
28114 emit_insn (RSB_32 (scratch2
, amount
));
28117 emit_insn (RSB_32 (scratch1
, amount
));
28118 /* Also set CC = amount > 32. */
28119 emit_insn (SUB_S_32 (scratch2
, amount
));
28122 emit_insn (RSB_32 (scratch1
, amount
));
28123 emit_insn (SUB_32 (scratch2
, amount
));
28126 gcc_unreachable ();
28129 /* Emit code like this:
28132 out_down = in_down << amount;
28133 out_down = (in_up << (amount - 32)) | out_down;
28134 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28135 out_up = in_up << amount;
28138 out_down = in_down >> amount;
28139 out_down = (in_up << (32 - amount)) | out_down;
28141 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28142 out_up = in_up << amount;
28145 out_down = in_down >> amount;
28146 out_down = (in_up << (32 - amount)) | out_down;
28148 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28149 out_up = in_up << amount;
28151 The ARM and Thumb2 variants are the same but implemented slightly
28152 differently. If this were only called during expand we could just
28153 use the Thumb2 case and let combine do the right thing, but this
28154 can also be called from post-reload splitters. */
28156 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
28158 if (!TARGET_THUMB2
)
28160 /* Emit code for ARM mode. */
28161 emit_insn (SET (out_down
,
28162 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
28163 if (code
== ASHIFTRT
)
28165 rtx_code_label
*done_label
= gen_label_rtx ();
28166 emit_jump_insn (BRANCH (LT
, done_label
));
28167 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
28169 emit_label (done_label
);
28172 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
28177 /* Emit code for Thumb2 mode.
28178 Thumb2 can't do shift and or in one insn. */
28179 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
28180 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
28182 if (code
== ASHIFTRT
)
28184 rtx_code_label
*done_label
= gen_label_rtx ();
28185 emit_jump_insn (BRANCH (LT
, done_label
));
28186 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
28187 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
28188 emit_label (done_label
);
28192 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
28193 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
28197 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
28211 /* Returns true if the pattern is a valid symbolic address, which is either a
28212 symbol_ref or (symbol_ref + addend).
28214 According to the ARM ELF ABI, the initial addend of REL-type relocations
28215 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
28216 literal field of the instruction as a 16-bit signed value in the range
28217 -32768 <= A < 32768. */
28220 arm_valid_symbolic_address_p (rtx addr
)
28222 rtx xop0
, xop1
= NULL_RTX
;
28225 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
28228 /* (const (plus: symbol_ref const_int)) */
28229 if (GET_CODE (addr
) == CONST
)
28230 tmp
= XEXP (addr
, 0);
28232 if (GET_CODE (tmp
) == PLUS
)
28234 xop0
= XEXP (tmp
, 0);
28235 xop1
= XEXP (tmp
, 1);
28237 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
28238 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
28244 /* Returns true if a valid comparison operation and makes
28245 the operands in a form that is valid. */
28247 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
28249 enum rtx_code code
= GET_CODE (*comparison
);
28251 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
28252 ? GET_MODE (*op2
) : GET_MODE (*op1
);
28254 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
28256 if (code
== UNEQ
|| code
== LTGT
)
28259 code_int
= (int)code
;
28260 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
28261 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
28266 if (!arm_add_operand (*op1
, mode
))
28267 *op1
= force_reg (mode
, *op1
);
28268 if (!arm_add_operand (*op2
, mode
))
28269 *op2
= force_reg (mode
, *op2
);
28273 if (!cmpdi_operand (*op1
, mode
))
28274 *op1
= force_reg (mode
, *op1
);
28275 if (!cmpdi_operand (*op2
, mode
))
28276 *op2
= force_reg (mode
, *op2
);
28280 if (!TARGET_VFP_FP16INST
)
28282 /* FP16 comparisons are done in SF mode. */
28284 *op1
= convert_to_mode (mode
, *op1
, 1);
28285 *op2
= convert_to_mode (mode
, *op2
, 1);
28286 /* Fall through. */
28289 if (!vfp_compare_operand (*op1
, mode
))
28290 *op1
= force_reg (mode
, *op1
);
28291 if (!vfp_compare_operand (*op2
, mode
))
28292 *op2
= force_reg (mode
, *op2
);
28302 /* Maximum number of instructions to set block of memory. */
28304 arm_block_set_max_insns (void)
28306 if (optimize_function_for_size_p (cfun
))
28309 return current_tune
->max_insns_inline_memset
;
28312 /* Return TRUE if it's profitable to set block of memory for
28313 non-vectorized case. VAL is the value to set the memory
28314 with. LENGTH is the number of bytes to set. ALIGN is the
28315 alignment of the destination memory in bytes. UNALIGNED_P
28316 is TRUE if we can only set the memory with instructions
28317 meeting alignment requirements. USE_STRD_P is TRUE if we
28318 can use strd to set the memory. */
28320 arm_block_set_non_vect_profit_p (rtx val
,
28321 unsigned HOST_WIDE_INT length
,
28322 unsigned HOST_WIDE_INT align
,
28323 bool unaligned_p
, bool use_strd_p
)
28326 /* For leftovers in bytes of 0-7, we can set the memory block using
28327 strb/strh/str with minimum instruction number. */
28328 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28332 num
= arm_const_inline_cost (SET
, val
);
28333 num
+= length
/ align
+ length
% align
;
28335 else if (use_strd_p
)
28337 num
= arm_const_double_inline_cost (val
);
28338 num
+= (length
>> 3) + leftover
[length
& 7];
28342 num
= arm_const_inline_cost (SET
, val
);
28343 num
+= (length
>> 2) + leftover
[length
& 3];
28346 /* We may be able to combine last pair STRH/STRB into a single STR
28347 by shifting one byte back. */
28348 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
28351 return (num
<= arm_block_set_max_insns ());
28354 /* Return TRUE if it's profitable to set block of memory for
28355 vectorized case. LENGTH is the number of bytes to set.
28356 ALIGN is the alignment of destination memory in bytes.
28357 MODE is the vector mode used to set the memory. */
28359 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
28360 unsigned HOST_WIDE_INT align
,
28364 bool unaligned_p
= ((align
& 3) != 0);
28365 unsigned int nelt
= GET_MODE_NUNITS (mode
);
28367 /* Instruction loading constant value. */
28369 /* Instructions storing the memory. */
28370 num
+= (length
+ nelt
- 1) / nelt
;
28371 /* Instructions adjusting the address expression. Only need to
28372 adjust address expression if it's 4 bytes aligned and bytes
28373 leftover can only be stored by mis-aligned store instruction. */
28374 if (!unaligned_p
&& (length
& 3) != 0)
28377 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28378 if (!unaligned_p
&& mode
== V16QImode
)
28381 return (num
<= arm_block_set_max_insns ());
28384 /* Set a block of memory using vectorization instructions for the
28385 unaligned case. We fill the first LENGTH bytes of the memory
28386 area starting from DSTBASE with byte constant VALUE. ALIGN is
28387 the alignment requirement of memory. Return TRUE if succeeded. */
28389 arm_block_set_unaligned_vect (rtx dstbase
,
28390 unsigned HOST_WIDE_INT length
,
28391 unsigned HOST_WIDE_INT value
,
28392 unsigned HOST_WIDE_INT align
)
28394 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
28396 rtx val_elt
, val_vec
, reg
;
28397 rtx rval
[MAX_VECT_LEN
];
28398 rtx (*gen_func
) (rtx
, rtx
);
28400 unsigned HOST_WIDE_INT v
= value
;
28401 unsigned int offset
= 0;
28402 gcc_assert ((align
& 0x3) != 0);
28403 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28404 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28405 if (length
>= nelt_v16
)
28408 gen_func
= gen_movmisalignv16qi
;
28413 gen_func
= gen_movmisalignv8qi
;
28415 nelt_mode
= GET_MODE_NUNITS (mode
);
28416 gcc_assert (length
>= nelt_mode
);
28417 /* Skip if it isn't profitable. */
28418 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28421 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28422 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28424 v
= sext_hwi (v
, BITS_PER_WORD
);
28425 val_elt
= GEN_INT (v
);
28426 for (j
= 0; j
< nelt_mode
; j
++)
28429 reg
= gen_reg_rtx (mode
);
28430 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28431 /* Emit instruction loading the constant value. */
28432 emit_move_insn (reg
, val_vec
);
28434 /* Handle nelt_mode bytes in a vector. */
28435 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28437 emit_insn ((*gen_func
) (mem
, reg
));
28438 if (i
+ 2 * nelt_mode
<= length
)
28440 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
28441 offset
+= nelt_mode
;
28442 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28446 /* If there are not less than nelt_v8 bytes leftover, we must be in
28448 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
28450 /* Handle (8, 16) bytes leftover. */
28451 if (i
+ nelt_v8
< length
)
28453 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
28454 offset
+= length
- i
;
28455 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28457 /* We are shifting bytes back, set the alignment accordingly. */
28458 if ((length
& 1) != 0 && align
>= 2)
28459 set_mem_align (mem
, BITS_PER_UNIT
);
28461 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28463 /* Handle (0, 8] bytes leftover. */
28464 else if (i
< length
&& i
+ nelt_v8
>= length
)
28466 if (mode
== V16QImode
)
28467 reg
= gen_lowpart (V8QImode
, reg
);
28469 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
28470 + (nelt_mode
- nelt_v8
))));
28471 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
28472 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
28474 /* We are shifting bytes back, set the alignment accordingly. */
28475 if ((length
& 1) != 0 && align
>= 2)
28476 set_mem_align (mem
, BITS_PER_UNIT
);
28478 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28484 /* Set a block of memory using vectorization instructions for the
28485 aligned case. We fill the first LENGTH bytes of the memory area
28486 starting from DSTBASE with byte constant VALUE. ALIGN is the
28487 alignment requirement of memory. Return TRUE if succeeded. */
28489 arm_block_set_aligned_vect (rtx dstbase
,
28490 unsigned HOST_WIDE_INT length
,
28491 unsigned HOST_WIDE_INT value
,
28492 unsigned HOST_WIDE_INT align
)
28494 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
28495 rtx dst
, addr
, mem
;
28496 rtx val_elt
, val_vec
, reg
;
28497 rtx rval
[MAX_VECT_LEN
];
28499 unsigned HOST_WIDE_INT v
= value
;
28500 unsigned int offset
= 0;
28502 gcc_assert ((align
& 0x3) == 0);
28503 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
28504 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
28505 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
28510 nelt_mode
= GET_MODE_NUNITS (mode
);
28511 gcc_assert (length
>= nelt_mode
);
28512 /* Skip if it isn't profitable. */
28513 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
28516 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28518 v
= sext_hwi (v
, BITS_PER_WORD
);
28519 val_elt
= GEN_INT (v
);
28520 for (j
= 0; j
< nelt_mode
; j
++)
28523 reg
= gen_reg_rtx (mode
);
28524 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
28525 /* Emit instruction loading the constant value. */
28526 emit_move_insn (reg
, val_vec
);
28529 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28530 if (mode
== V16QImode
)
28532 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28533 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28535 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28536 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
28538 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28539 offset
+= length
- nelt_mode
;
28540 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28541 /* We are shifting bytes back, set the alignment accordingly. */
28542 if ((length
& 0x3) == 0)
28543 set_mem_align (mem
, BITS_PER_UNIT
* 4);
28544 else if ((length
& 0x1) == 0)
28545 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28547 set_mem_align (mem
, BITS_PER_UNIT
);
28549 emit_insn (gen_movmisalignv16qi (mem
, reg
));
28552 /* Fall through for bytes leftover. */
28554 nelt_mode
= GET_MODE_NUNITS (mode
);
28555 reg
= gen_lowpart (V8QImode
, reg
);
28558 /* Handle 8 bytes in a vector. */
28559 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
28561 addr
= plus_constant (Pmode
, dst
, i
);
28562 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
28563 emit_move_insn (mem
, reg
);
28566 /* Handle single word leftover by shifting 4 bytes back. We can
28567 use aligned access for this case. */
28568 if (i
+ UNITS_PER_WORD
== length
)
28570 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
28571 offset
+= i
- UNITS_PER_WORD
;
28572 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
28573 /* We are shifting 4 bytes back, set the alignment accordingly. */
28574 if (align
> UNITS_PER_WORD
)
28575 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
28577 emit_move_insn (mem
, reg
);
28579 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28580 We have to use unaligned access for this case. */
28581 else if (i
< length
)
28583 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
28584 offset
+= length
- nelt_mode
;
28585 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
28586 /* We are shifting bytes back, set the alignment accordingly. */
28587 if ((length
& 1) == 0)
28588 set_mem_align (mem
, BITS_PER_UNIT
* 2);
28590 set_mem_align (mem
, BITS_PER_UNIT
);
28592 emit_insn (gen_movmisalignv8qi (mem
, reg
));
28598 /* Set a block of memory using plain strh/strb instructions, only
28599 using instructions allowed by ALIGN on processor. We fill the
28600 first LENGTH bytes of the memory area starting from DSTBASE
28601 with byte constant VALUE. ALIGN is the alignment requirement
28604 arm_block_set_unaligned_non_vect (rtx dstbase
,
28605 unsigned HOST_WIDE_INT length
,
28606 unsigned HOST_WIDE_INT value
,
28607 unsigned HOST_WIDE_INT align
)
28610 rtx dst
, addr
, mem
;
28611 rtx val_exp
, val_reg
, reg
;
28613 HOST_WIDE_INT v
= value
;
28615 gcc_assert (align
== 1 || align
== 2);
28618 v
|= (value
<< BITS_PER_UNIT
);
28620 v
= sext_hwi (v
, BITS_PER_WORD
);
28621 val_exp
= GEN_INT (v
);
28622 /* Skip if it isn't profitable. */
28623 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28624 align
, true, false))
28627 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28628 mode
= (align
== 2 ? HImode
: QImode
);
28629 val_reg
= force_reg (SImode
, val_exp
);
28630 reg
= gen_lowpart (mode
, val_reg
);
28632 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
28634 addr
= plus_constant (Pmode
, dst
, i
);
28635 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
28636 emit_move_insn (mem
, reg
);
28639 /* Handle single byte leftover. */
28640 if (i
+ 1 == length
)
28642 reg
= gen_lowpart (QImode
, val_reg
);
28643 addr
= plus_constant (Pmode
, dst
, i
);
28644 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
28645 emit_move_insn (mem
, reg
);
28649 gcc_assert (i
== length
);
28653 /* Set a block of memory using plain strd/str/strh/strb instructions,
28654 to permit unaligned copies on processors which support unaligned
28655 semantics for those instructions. We fill the first LENGTH bytes
28656 of the memory area starting from DSTBASE with byte constant VALUE.
28657 ALIGN is the alignment requirement of memory. */
28659 arm_block_set_aligned_non_vect (rtx dstbase
,
28660 unsigned HOST_WIDE_INT length
,
28661 unsigned HOST_WIDE_INT value
,
28662 unsigned HOST_WIDE_INT align
)
28665 rtx dst
, addr
, mem
;
28666 rtx val_exp
, val_reg
, reg
;
28667 unsigned HOST_WIDE_INT v
;
28670 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
28671 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
28673 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
28674 if (length
< UNITS_PER_WORD
)
28675 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
28678 v
|= (v
<< BITS_PER_WORD
);
28680 v
= sext_hwi (v
, BITS_PER_WORD
);
28682 val_exp
= GEN_INT (v
);
28683 /* Skip if it isn't profitable. */
28684 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28685 align
, false, use_strd_p
))
28690 /* Try without strd. */
28691 v
= (v
>> BITS_PER_WORD
);
28692 v
= sext_hwi (v
, BITS_PER_WORD
);
28693 val_exp
= GEN_INT (v
);
28694 use_strd_p
= false;
28695 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
28696 align
, false, use_strd_p
))
28701 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
28702 /* Handle double words using strd if possible. */
28705 val_reg
= force_reg (DImode
, val_exp
);
28707 for (; (i
+ 8 <= length
); i
+= 8)
28709 addr
= plus_constant (Pmode
, dst
, i
);
28710 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
28711 emit_move_insn (mem
, reg
);
28715 val_reg
= force_reg (SImode
, val_exp
);
28717 /* Handle words. */
28718 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
28719 for (; (i
+ 4 <= length
); i
+= 4)
28721 addr
= plus_constant (Pmode
, dst
, i
);
28722 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
28723 if ((align
& 3) == 0)
28724 emit_move_insn (mem
, reg
);
28726 emit_insn (gen_unaligned_storesi (mem
, reg
));
28729 /* Merge last pair of STRH and STRB into a STR if possible. */
28730 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
28732 addr
= plus_constant (Pmode
, dst
, i
- 1);
28733 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
28734 /* We are shifting one byte back, set the alignment accordingly. */
28735 if ((align
& 1) == 0)
28736 set_mem_align (mem
, BITS_PER_UNIT
);
28738 /* Most likely this is an unaligned access, and we can't tell at
28739 compilation time. */
28740 emit_insn (gen_unaligned_storesi (mem
, reg
));
28744 /* Handle half word leftover. */
28745 if (i
+ 2 <= length
)
28747 reg
= gen_lowpart (HImode
, val_reg
);
28748 addr
= plus_constant (Pmode
, dst
, i
);
28749 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
28750 if ((align
& 1) == 0)
28751 emit_move_insn (mem
, reg
);
28753 emit_insn (gen_unaligned_storehi (mem
, reg
));
28758 /* Handle single byte leftover. */
28759 if (i
+ 1 == length
)
28761 reg
= gen_lowpart (QImode
, val_reg
);
28762 addr
= plus_constant (Pmode
, dst
, i
);
28763 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
28764 emit_move_insn (mem
, reg
);
28770 /* Set a block of memory using vectorization instructions for both
28771 aligned and unaligned cases. We fill the first LENGTH bytes of
28772 the memory area starting from DSTBASE with byte constant VALUE.
28773 ALIGN is the alignment requirement of memory. */
28775 arm_block_set_vect (rtx dstbase
,
28776 unsigned HOST_WIDE_INT length
,
28777 unsigned HOST_WIDE_INT value
,
28778 unsigned HOST_WIDE_INT align
)
28780 /* Check whether we need to use unaligned store instruction. */
28781 if (((align
& 3) != 0 || (length
& 3) != 0)
28782 /* Check whether unaligned store instruction is available. */
28783 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
28786 if ((align
& 3) == 0)
28787 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
28789 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
28792 /* Expand string store operation. Firstly we try to do that by using
28793 vectorization instructions, then try with ARM unaligned access and
28794 double-word store if profitable. OPERANDS[0] is the destination,
28795 OPERANDS[1] is the number of bytes, operands[2] is the value to
28796 initialize the memory, OPERANDS[3] is the known alignment of the
28799 arm_gen_setmem (rtx
*operands
)
28801 rtx dstbase
= operands
[0];
28802 unsigned HOST_WIDE_INT length
;
28803 unsigned HOST_WIDE_INT value
;
28804 unsigned HOST_WIDE_INT align
;
28806 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
28809 length
= UINTVAL (operands
[1]);
28813 value
= (UINTVAL (operands
[2]) & 0xFF);
28814 align
= UINTVAL (operands
[3]);
28815 if (TARGET_NEON
&& length
>= 8
28816 && current_tune
->string_ops_prefer_neon
28817 && arm_block_set_vect (dstbase
, length
, value
, align
))
28820 if (!unaligned_access
&& (align
& 3) != 0)
28821 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
28823 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
28828 arm_macro_fusion_p (void)
28830 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
28833 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
28834 for MOVW / MOVT macro fusion. */
28837 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
28839 /* We are trying to fuse
28840 movw imm / movt imm
28841 instructions as a group that gets scheduled together. */
28843 rtx set_dest
= SET_DEST (curr_set
);
28845 if (GET_MODE (set_dest
) != SImode
)
28848 /* We are trying to match:
28849 prev (movw) == (set (reg r0) (const_int imm16))
28850 curr (movt) == (set (zero_extract (reg r0)
28853 (const_int imm16_1))
28855 prev (movw) == (set (reg r1)
28856 (high (symbol_ref ("SYM"))))
28857 curr (movt) == (set (reg r0)
28859 (symbol_ref ("SYM")))) */
28861 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
28863 if (CONST_INT_P (SET_SRC (curr_set
))
28864 && CONST_INT_P (SET_SRC (prev_set
))
28865 && REG_P (XEXP (set_dest
, 0))
28866 && REG_P (SET_DEST (prev_set
))
28867 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
28871 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
28872 && REG_P (SET_DEST (curr_set
))
28873 && REG_P (SET_DEST (prev_set
))
28874 && GET_CODE (SET_SRC (prev_set
)) == HIGH
28875 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
28882 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
28884 rtx prev_set
= single_set (prev
);
28885 rtx curr_set
= single_set (curr
);
28891 if (any_condjump_p (curr
))
28894 if (!arm_macro_fusion_p ())
28897 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
28898 && aarch_crypto_can_dual_issue (prev
, curr
))
28901 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
28902 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
28908 /* Return true iff the instruction fusion described by OP is enabled. */
28910 arm_fusion_enabled_p (tune_params::fuse_ops op
)
28912 return current_tune
->fusible_ops
& op
;
28915 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
28917 static unsigned HOST_WIDE_INT
28918 arm_asan_shadow_offset (void)
28920 return HOST_WIDE_INT_1U
<< 29;
28924 /* This is a temporary fix for PR60655. Ideally we need
28925 to handle most of these cases in the generic part but
28926 currently we reject minus (..) (sym_ref). We try to
28927 ameliorate the case with minus (sym_ref1) (sym_ref2)
28928 where they are in the same section. */
28931 arm_const_not_ok_for_debug_p (rtx p
)
28933 tree decl_op0
= NULL
;
28934 tree decl_op1
= NULL
;
28936 if (GET_CODE (p
) == MINUS
)
28938 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
28940 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
28942 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
28943 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
28945 if ((VAR_P (decl_op1
)
28946 || TREE_CODE (decl_op1
) == CONST_DECL
)
28947 && (VAR_P (decl_op0
)
28948 || TREE_CODE (decl_op0
) == CONST_DECL
))
28949 return (get_variable_section (decl_op1
, false)
28950 != get_variable_section (decl_op0
, false));
28952 if (TREE_CODE (decl_op1
) == LABEL_DECL
28953 && TREE_CODE (decl_op0
) == LABEL_DECL
)
28954 return (DECL_CONTEXT (decl_op1
)
28955 != DECL_CONTEXT (decl_op0
));
28965 /* return TRUE if x is a reference to a value in a constant pool */
28967 arm_is_constant_pool_ref (rtx x
)
28970 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
28971 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
28974 /* Remember the last target of arm_set_current_function. */
28975 static GTY(()) tree arm_previous_fndecl
;
28977 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
28980 save_restore_target_globals (tree new_tree
)
28982 /* If we have a previous state, use it. */
28983 if (TREE_TARGET_GLOBALS (new_tree
))
28984 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
28985 else if (new_tree
== target_option_default_node
)
28986 restore_target_globals (&default_target_globals
);
28989 /* Call target_reinit and save the state for TARGET_GLOBALS. */
28990 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
28993 arm_option_params_internal ();
28996 /* Invalidate arm_previous_fndecl. */
28999 arm_reset_previous_fndecl (void)
29001 arm_previous_fndecl
= NULL_TREE
;
29004 /* Establish appropriate back-end context for processing the function
29005 FNDECL. The argument might be NULL to indicate processing at top
29006 level, outside of any function scope. */
29009 arm_set_current_function (tree fndecl
)
29011 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
29014 tree old_tree
= (arm_previous_fndecl
29015 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
29018 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
29020 /* If current function has no attributes but previous one did,
29021 use the default node. */
29022 if (! new_tree
&& old_tree
)
29023 new_tree
= target_option_default_node
;
29025 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
29026 the default have been handled by save_restore_target_globals from
29027 arm_pragma_target_parse. */
29028 if (old_tree
== new_tree
)
29031 arm_previous_fndecl
= fndecl
;
29033 /* First set the target options. */
29034 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
29036 save_restore_target_globals (new_tree
);
29039 /* Implement TARGET_OPTION_PRINT. */
29042 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
29044 int flags
= ptr
->x_target_flags
;
29045 const struct arm_fpu_desc
*fpu_desc
= &all_fpus
[ptr
->x_arm_fpu_index
];
29047 fprintf (file
, "%*sselected arch %s\n", indent
, "",
29048 TARGET_THUMB2_P (flags
) ? "thumb2" :
29049 TARGET_THUMB_P (flags
) ? "thumb1" :
29052 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_desc
->name
);
29055 /* Hook to determine if one function can safely inline another. */
29058 arm_can_inline_p (tree caller
, tree callee
)
29060 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
29061 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
29063 struct cl_target_option
*caller_opts
29064 = TREE_TARGET_OPTION (caller_tree
? caller_tree
29065 : target_option_default_node
);
29067 struct cl_target_option
*callee_opts
29068 = TREE_TARGET_OPTION (callee_tree
? callee_tree
29069 : target_option_default_node
);
29071 const struct arm_fpu_desc
*caller_fpu
29072 = &all_fpus
[caller_opts
->x_arm_fpu_index
];
29073 const struct arm_fpu_desc
*callee_fpu
29074 = &all_fpus
[callee_opts
->x_arm_fpu_index
];
29076 /* Callee's fpu features should be a subset of the caller's. */
29077 if ((caller_fpu
->features
& callee_fpu
->features
) != callee_fpu
->features
)
29080 /* Need same FPU regs. */
29081 if (callee_fpu
->regs
!= callee_fpu
->regs
)
29084 /* OK to inline between different modes.
29085 Function with mode specific instructions, e.g using asm,
29086 must be explicitly protected with noinline. */
29090 /* Hook to fix function's alignment affected by target attribute. */
29093 arm_relayout_function (tree fndecl
)
29095 if (DECL_USER_ALIGN (fndecl
))
29098 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
29101 callee_tree
= target_option_default_node
;
29103 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
29104 SET_DECL_ALIGN (fndecl
, FUNCTION_BOUNDARY_P (opts
->x_target_flags
));
29107 /* Inner function to process the attribute((target(...))), take an argument and
29108 set the current options from the argument. If we have a list, recursively
29109 go over the list. */
29112 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
29114 if (TREE_CODE (args
) == TREE_LIST
)
29118 for (; args
; args
= TREE_CHAIN (args
))
29119 if (TREE_VALUE (args
)
29120 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
29125 else if (TREE_CODE (args
) != STRING_CST
)
29127 error ("attribute %<target%> argument not a string");
29131 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
29134 while ((q
= strtok (argstr
, ",")) != NULL
)
29136 while (ISSPACE (*q
)) ++q
;
29139 if (!strncmp (q
, "thumb", 5))
29140 opts
->x_target_flags
|= MASK_THUMB
;
29142 else if (!strncmp (q
, "arm", 3))
29143 opts
->x_target_flags
&= ~MASK_THUMB
;
29145 else if (!strncmp (q
, "fpu=", 4))
29147 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
29148 &opts
->x_arm_fpu_index
, CL_TARGET
))
29150 error ("invalid fpu for attribute(target(\"%s\"))", q
);
29156 error ("attribute(target(\"%s\")) is unknown", q
);
29160 arm_option_check_internal (opts
);
29166 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29169 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
29170 struct gcc_options
*opts_set
)
29172 if (!arm_valid_target_attribute_rec (args
, opts
))
29175 /* Do any overrides, such as global options arch=xxx. */
29176 arm_option_override_internal (opts
, opts_set
);
29178 return build_target_option_node (opts
);
29182 add_attribute (const char * mode
, tree
*attributes
)
29184 size_t len
= strlen (mode
);
29185 tree value
= build_string (len
, mode
);
29187 TREE_TYPE (value
) = build_array_type (char_type_node
,
29188 build_index_type (size_int (len
)));
29190 *attributes
= tree_cons (get_identifier ("target"),
29191 build_tree_list (NULL_TREE
, value
),
29195 /* For testing. Insert thumb or arm modes alternatively on functions. */
29198 arm_insert_attributes (tree fndecl
, tree
* attributes
)
29202 if (! TARGET_FLIP_THUMB
)
29205 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
29206 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
29209 /* Nested definitions must inherit mode. */
29210 if (current_function_decl
)
29212 mode
= TARGET_THUMB
? "thumb" : "arm";
29213 add_attribute (mode
, attributes
);
29217 /* If there is already a setting don't change it. */
29218 if (lookup_attribute ("target", *attributes
) != NULL
)
29221 mode
= thumb_flipper
? "thumb" : "arm";
29222 add_attribute (mode
, attributes
);
29224 thumb_flipper
= !thumb_flipper
;
29227 /* Hook to validate attribute((target("string"))). */
29230 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
29231 tree args
, int ARG_UNUSED (flags
))
29234 struct gcc_options func_options
;
29235 tree cur_tree
, new_optimize
;
29236 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
29238 /* Get the optimization options of the current function. */
29239 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
29241 /* If the function changed the optimization levels as well as setting target
29242 options, start with the optimizations specified. */
29243 if (!func_optimize
)
29244 func_optimize
= optimization_default_node
;
29246 /* Init func_options. */
29247 memset (&func_options
, 0, sizeof (func_options
));
29248 init_options_struct (&func_options
, NULL
);
29249 lang_hooks
.init_options_struct (&func_options
);
29251 /* Initialize func_options to the defaults. */
29252 cl_optimization_restore (&func_options
,
29253 TREE_OPTIMIZATION (func_optimize
));
29255 cl_target_option_restore (&func_options
,
29256 TREE_TARGET_OPTION (target_option_default_node
));
29258 /* Set func_options flags with new target mode. */
29259 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
29260 &global_options_set
);
29262 if (cur_tree
== NULL_TREE
)
29265 new_optimize
= build_optimization_node (&func_options
);
29267 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
29269 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
29271 finalize_options_struct (&func_options
);
29277 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
29280 fprintf (stream
, "\t.syntax unified\n");
29284 if (is_called_in_ARM_mode (decl
)
29285 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
29286 && cfun
->is_thunk
))
29287 fprintf (stream
, "\t.code 32\n");
29288 else if (TARGET_THUMB1
)
29289 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
29291 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
29294 fprintf (stream
, "\t.arm\n");
29296 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
29297 TARGET_SOFT_FLOAT
? "softvfp" : TARGET_FPU_NAME
);
29299 if (TARGET_POKE_FUNCTION_NAME
)
29300 arm_poke_function_name (stream
, (const char *) name
);
29303 /* If MEM is in the form of [base+offset], extract the two parts
29304 of address and set to BASE and OFFSET, otherwise return false
29305 after clearing BASE and OFFSET. */
29308 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
29312 gcc_assert (MEM_P (mem
));
29314 addr
= XEXP (mem
, 0);
29316 /* Strip off const from addresses like (const (addr)). */
29317 if (GET_CODE (addr
) == CONST
)
29318 addr
= XEXP (addr
, 0);
29320 if (GET_CODE (addr
) == REG
)
29323 *offset
= const0_rtx
;
29327 if (GET_CODE (addr
) == PLUS
29328 && GET_CODE (XEXP (addr
, 0)) == REG
29329 && CONST_INT_P (XEXP (addr
, 1)))
29331 *base
= XEXP (addr
, 0);
29332 *offset
= XEXP (addr
, 1);
29337 *offset
= NULL_RTX
;
29342 /* If INSN is a load or store of address in the form of [base+offset],
29343 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29344 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29345 otherwise return FALSE. */
29348 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
29352 gcc_assert (INSN_P (insn
));
29353 x
= PATTERN (insn
);
29354 if (GET_CODE (x
) != SET
)
29358 dest
= SET_DEST (x
);
29359 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
29362 extract_base_offset_in_addr (dest
, base
, offset
);
29364 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
29367 extract_base_offset_in_addr (src
, base
, offset
);
29372 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
29375 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29377 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29378 and PRI are only calculated for these instructions. For other instruction,
29379 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29380 instruction fusion can be supported by returning different priorities.
29382 It's important that irrelevant instructions get the largest FUSION_PRI. */
29385 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
29386 int *fusion_pri
, int *pri
)
29392 gcc_assert (INSN_P (insn
));
29395 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
29402 /* Load goes first. */
29404 *fusion_pri
= tmp
- 1;
29406 *fusion_pri
= tmp
- 2;
29410 /* INSN with smaller base register goes first. */
29411 tmp
-= ((REGNO (base
) & 0xff) << 20);
29413 /* INSN with smaller offset goes first. */
29414 off_val
= (int)(INTVAL (offset
));
29416 tmp
-= (off_val
& 0xfffff);
29418 tmp
+= ((- off_val
) & 0xfffff);
29425 /* Construct and return a PARALLEL RTX vector with elements numbering the
29426 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
29427 the vector - from the perspective of the architecture. This does not
29428 line up with GCC's perspective on lane numbers, so we end up with
29429 different masks depending on our target endian-ness. The diagram
29430 below may help. We must draw the distinction when building masks
29431 which select one half of the vector. An instruction selecting
29432 architectural low-lanes for a big-endian target, must be described using
29433 a mask selecting GCC high-lanes.
29435 Big-Endian Little-Endian
29437 GCC 0 1 2 3 3 2 1 0
29438 | x | x | x | x | | x | x | x | x |
29439 Architecture 3 2 1 0 3 2 1 0
29441 Low Mask: { 2, 3 } { 0, 1 }
29442 High Mask: { 0, 1 } { 2, 3 }
29446 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
29448 int nunits
= GET_MODE_NUNITS (mode
);
29449 rtvec v
= rtvec_alloc (nunits
/ 2);
29450 int high_base
= nunits
/ 2;
29456 if (BYTES_BIG_ENDIAN
)
29457 base
= high
? low_base
: high_base
;
29459 base
= high
? high_base
: low_base
;
29461 for (i
= 0; i
< nunits
/ 2; i
++)
29462 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
29464 t1
= gen_rtx_PARALLEL (mode
, v
);
29468 /* Check OP for validity as a PARALLEL RTX vector with elements
29469 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
29470 from the perspective of the architecture. See the diagram above
29471 arm_simd_vect_par_cnst_half_p for more details. */
29474 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
29477 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
29478 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
29479 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
29482 if (!VECTOR_MODE_P (mode
))
29485 if (count_op
!= count_ideal
)
29488 for (i
= 0; i
< count_ideal
; i
++)
29490 rtx elt_op
= XVECEXP (op
, 0, i
);
29491 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
29493 if (!CONST_INT_P (elt_op
)
29494 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
29500 /* Can output mi_thunk for all cases except for non-zero vcall_offset
29503 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
29506 /* For now, we punt and not handle this for TARGET_THUMB1. */
29507 if (vcall_offset
&& TARGET_THUMB1
)
29510 /* Otherwise ok. */
29514 /* Generate RTL for a conditional branch with rtx comparison CODE in
29515 mode CC_MODE. The destination of the unlikely conditional branch
29519 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
29523 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
29524 gen_rtx_REG (cc_mode
, CC_REGNUM
),
29527 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
29528 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
29530 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
29533 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
29535 For pure-code sections there is no letter code for this attribute, so
29536 output all the section flags numerically when this is needed. */
29539 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
29542 if (flags
& SECTION_ARM_PURECODE
)
29546 if (!(flags
& SECTION_DEBUG
))
29548 if (flags
& SECTION_EXCLUDE
)
29549 *num
|= 0x80000000;
29550 if (flags
& SECTION_WRITE
)
29552 if (flags
& SECTION_CODE
)
29554 if (flags
& SECTION_MERGE
)
29556 if (flags
& SECTION_STRINGS
)
29558 if (flags
& SECTION_TLS
)
29560 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
29569 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
29571 If pure-code is passed as an option, make sure all functions are in
29572 sections that have the SHF_ARM_PURECODE attribute. */
29575 arm_function_section (tree decl
, enum node_frequency freq
,
29576 bool startup
, bool exit
)
29578 const char * section_name
;
29581 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
29582 return default_function_section (decl
, freq
, startup
, exit
);
29584 if (!target_pure_code
)
29585 return default_function_section (decl
, freq
, startup
, exit
);
29588 section_name
= DECL_SECTION_NAME (decl
);
29590 /* If a function is not in a named section then it falls under the 'default'
29591 text section, also known as '.text'. We can preserve previous behavior as
29592 the default text section already has the SHF_ARM_PURECODE section
29596 section
*default_sec
= default_function_section (decl
, freq
, startup
,
29599 /* If default_sec is not null, then it must be a special section like for
29600 example .text.startup. We set the pure-code attribute and return the
29601 same section to preserve existing behavior. */
29603 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
29604 return default_sec
;
29607 /* Otherwise look whether a section has already been created with
29609 sec
= get_named_section (decl
, section_name
, 0);
29611 /* If that is not the case passing NULL as the section's name to
29612 'get_named_section' will create a section with the declaration's
29614 sec
= get_named_section (decl
, NULL
, 0);
29616 /* Set the SHF_ARM_PURECODE attribute. */
29617 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
29622 /* Implements the TARGET_SECTION_FLAGS hook.
29624 If DECL is a function declaration and pure-code is passed as an option
29625 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
29626 section's name and RELOC indicates whether the declarations initializer may
29627 contain runtime relocations. */
29629 static unsigned int
29630 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
29632 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
29634 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
29635 flags
|= SECTION_ARM_PURECODE
;
29640 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
29643 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
29645 rtx
*quot_p
, rtx
*rem_p
)
29647 if (mode
== SImode
)
29648 gcc_assert (!TARGET_IDIV
);
29650 machine_mode libval_mode
= smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode
),
29653 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
29655 op0
, GET_MODE (op0
),
29656 op1
, GET_MODE (op1
));
29658 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
29659 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
29660 GET_MODE_SIZE (mode
));
29662 gcc_assert (quotient
);
29663 gcc_assert (remainder
);
29665 *quot_p
= quotient
;
29666 *rem_p
= remainder
;
29669 #include "gt-arm.h"